diff --git a/lab-logistic-regression-with-python.ipynb b/lab-logistic-regression-with-python.ipynb
index 05ead5e..c5abfe4 100644
--- a/lab-logistic-regression-with-python.ipynb
+++ b/lab-logistic-regression-with-python.ipynb
@@ -150,7 +150,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {
"button": false,
"new_sheet": false,
@@ -226,7 +226,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"button": false,
"new_sheet": false,
@@ -273,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"button": false,
"new_sheet": false,
@@ -281,7 +281,207 @@
"read_only": false
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tenure | \n",
+ " age | \n",
+ " address | \n",
+ " income | \n",
+ " ed | \n",
+ " employ | \n",
+ " equip | \n",
+ " callcard | \n",
+ " wireless | \n",
+ " longmon | \n",
+ " ... | \n",
+ " pager | \n",
+ " internet | \n",
+ " callwait | \n",
+ " confer | \n",
+ " ebill | \n",
+ " loglong | \n",
+ " logtoll | \n",
+ " lninc | \n",
+ " custcat | \n",
+ " churn | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 11.0 | \n",
+ " 33.0 | \n",
+ " 7.0 | \n",
+ " 136.0 | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 4.40 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.482 | \n",
+ " 3.033 | \n",
+ " 4.913 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 33.0 | \n",
+ " 33.0 | \n",
+ " 12.0 | \n",
+ " 33.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 9.45 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 2.246 | \n",
+ " 3.240 | \n",
+ " 3.497 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 23.0 | \n",
+ " 30.0 | \n",
+ " 9.0 | \n",
+ " 30.0 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 6.30 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.841 | \n",
+ " 3.240 | \n",
+ " 3.401 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 38.0 | \n",
+ " 35.0 | \n",
+ " 5.0 | \n",
+ " 76.0 | \n",
+ " 2.0 | \n",
+ " 10.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 6.05 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.800 | \n",
+ " 3.807 | \n",
+ " 4.331 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 7.0 | \n",
+ " 35.0 | \n",
+ " 14.0 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 15.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 7.10 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.960 | \n",
+ " 3.091 | \n",
+ " 4.382 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 28 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tenure age address income ed employ equip callcard wireless \\\n",
+ "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n",
+ "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n",
+ "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n",
+ "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n",
+ "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n",
+ "\n",
+ " longmon ... pager internet callwait confer ebill loglong logtoll \\\n",
+ "0 4.40 ... 1.0 0.0 1.0 1.0 0.0 1.482 3.033 \n",
+ "1 9.45 ... 0.0 0.0 0.0 0.0 0.0 2.246 3.240 \n",
+ "2 6.30 ... 0.0 0.0 0.0 1.0 0.0 1.841 3.240 \n",
+ "3 6.05 ... 1.0 1.0 1.0 1.0 1.0 1.800 3.807 \n",
+ "4 7.10 ... 0.0 0.0 1.0 1.0 0.0 1.960 3.091 \n",
+ "\n",
+ " lninc custcat churn \n",
+ "0 4.913 4.0 1.0 \n",
+ "1 3.497 1.0 1.0 \n",
+ "2 3.401 3.0 0.0 \n",
+ "3 4.331 4.0 0.0 \n",
+ "4 4.382 3.0 0.0 \n",
+ "\n",
+ "[5 rows x 28 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"churn_df = pd.read_csv(path)\n",
"churn_df.head()"
@@ -303,9 +503,133 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tenure | \n",
+ " age | \n",
+ " address | \n",
+ " income | \n",
+ " ed | \n",
+ " employ | \n",
+ " equip | \n",
+ " callcard | \n",
+ " wireless | \n",
+ " churn | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 11.0 | \n",
+ " 33.0 | \n",
+ " 7.0 | \n",
+ " 136.0 | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 33.0 | \n",
+ " 33.0 | \n",
+ " 12.0 | \n",
+ " 33.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 23.0 | \n",
+ " 30.0 | \n",
+ " 9.0 | \n",
+ " 30.0 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 38.0 | \n",
+ " 35.0 | \n",
+ " 5.0 | \n",
+ " 76.0 | \n",
+ " 2.0 | \n",
+ " 10.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 7.0 | \n",
+ " 35.0 | \n",
+ " 14.0 | \n",
+ " 80.0 | \n",
+ " 2.0 | \n",
+ " 15.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tenure age address income ed employ equip callcard wireless \\\n",
+ "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n",
+ "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n",
+ "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n",
+ "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n",
+ "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n",
+ "\n",
+ " churn \n",
+ "0 1 \n",
+ "1 1 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"churn_df = churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip', 'callcard', 'wireless','churn']]\n",
"churn_df['churn'] = churn_df['churn'].astype('int')\n",
@@ -329,7 +653,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {
"button": false,
"new_sheet": false,
@@ -337,9 +661,20 @@
"read_only": false
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(200, 10)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# write your code here\n"
+ "churn_df.shape\n"
]
},
{
@@ -365,9 +700,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 11., 33., 7., 136., 5., 5., 0.],\n",
+ " [ 33., 33., 12., 33., 2., 0., 0.],\n",
+ " [ 23., 30., 9., 30., 1., 2., 0.],\n",
+ " [ 38., 35., 5., 76., 2., 10., 1.],\n",
+ " [ 7., 35., 14., 80., 2., 15., 0.]])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"X = np.asarray(churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip']])\n",
"X[0:5]"
@@ -375,9 +725,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 1, 0, 0, 0])"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"y = np.asarray(churn_df['churn'])\n",
"y [0:5]"
@@ -392,9 +753,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.13518441, -0.62595491, -0.4588971 , 0.4751423 , 1.6961288 ,\n",
+ " -0.58477841, -0.85972695],\n",
+ " [-0.11604313, -0.62595491, 0.03454064, -0.32886061, -0.6433592 ,\n",
+ " -1.14437497, -0.85972695],\n",
+ " [-0.57928917, -0.85594447, -0.261522 , -0.35227817, -1.42318853,\n",
+ " -0.92053635, -0.85972695],\n",
+ " [ 0.11557989, -0.47262854, -0.65627219, 0.00679109, -0.6433592 ,\n",
+ " -0.02518185, 1.16316 ],\n",
+ " [-1.32048283, -0.47262854, 0.23191574, 0.03801451, -0.6433592 ,\n",
+ " 0.53441472, -0.85972695]])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn import preprocessing\n",
"X = preprocessing.StandardScaler().fit(X).transform(X)\n",
@@ -417,9 +798,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Train set: (160, 7) (160,)\n",
+ "Test set: (40, 7) (40,)\n"
+ ]
+ }
+ ],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)\n",
@@ -447,9 +837,427 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "LogisticRegression(C=0.01, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "LogisticRegression(C=0.01, solver='liblinear')"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import confusion_matrix\n",
@@ -466,9 +1274,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0])"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"yhat = LR.predict(X_test)\n",
"yhat"
@@ -483,9 +1303,59 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[0.54132919, 0.45867081],\n",
+ " [0.60593357, 0.39406643],\n",
+ " [0.56277713, 0.43722287],\n",
+ " [0.63432489, 0.36567511],\n",
+ " [0.56431839, 0.43568161],\n",
+ " [0.55386646, 0.44613354],\n",
+ " [0.52237207, 0.47762793],\n",
+ " [0.60514349, 0.39485651],\n",
+ " [0.41069572, 0.58930428],\n",
+ " [0.6333873 , 0.3666127 ],\n",
+ " [0.58068791, 0.41931209],\n",
+ " [0.62768628, 0.37231372],\n",
+ " [0.47559883, 0.52440117],\n",
+ " [0.4267593 , 0.5732407 ],\n",
+ " [0.66172417, 0.33827583],\n",
+ " [0.55092315, 0.44907685],\n",
+ " [0.51749946, 0.48250054],\n",
+ " [0.485743 , 0.514257 ],\n",
+ " [0.49011451, 0.50988549],\n",
+ " [0.52423349, 0.47576651],\n",
+ " [0.61619519, 0.38380481],\n",
+ " [0.52696302, 0.47303698],\n",
+ " [0.63957168, 0.36042832],\n",
+ " [0.52205164, 0.47794836],\n",
+ " [0.50572852, 0.49427148],\n",
+ " [0.70706202, 0.29293798],\n",
+ " [0.55266286, 0.44733714],\n",
+ " [0.52271594, 0.47728406],\n",
+ " [0.51638863, 0.48361137],\n",
+ " [0.71331391, 0.28668609],\n",
+ " [0.67862111, 0.32137889],\n",
+ " [0.50896403, 0.49103597],\n",
+ " [0.42348082, 0.57651918],\n",
+ " [0.71495838, 0.28504162],\n",
+ " [0.59711064, 0.40288936],\n",
+ " [0.63808839, 0.36191161],\n",
+ " [0.39957895, 0.60042105],\n",
+ " [0.52127638, 0.47872362],\n",
+ " [0.65975464, 0.34024536],\n",
+ " [0.5114172 , 0.4885828 ]])"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"yhat_prob = LR.predict_proba(X_test)\n",
"yhat_prob"
@@ -509,9 +1379,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7058823529411765"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.metrics import jaccard_score\n",
"jaccard_score(y_test, yhat,pos_label=0)"
@@ -528,9 +1409,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 6 9]\n",
+ " [ 1 24]]\n"
+ ]
+ }
+ ],
"source": [
"from sklearn.metrics import classification_report, confusion_matrix\n",
"import itertools\n",
@@ -572,9 +1462,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Confusion matrix, without normalization\n",
+ "[[ 6 9]\n",
+ " [ 1 24]]\n"
+ ]
+ }
+ ],
"source": [
"# Compute confusion matrix\n",
"cnf_matrix = confusion_matrix(y_test, yhat, labels=[1,0])\n",
@@ -604,9 +1504,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.73 0.96 0.83 25\n",
+ " 1 0.86 0.40 0.55 15\n",
+ "\n",
+ " accuracy 0.75 40\n",
+ " macro avg 0.79 0.68 0.69 40\n",
+ "weighted avg 0.78 0.75 0.72 40\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"print (classification_report(y_test, yhat))\n"
]
@@ -643,9 +1559,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.6017092478101185"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.metrics import log_loss\n",
"log_loss(y_test, yhat_prob)"
@@ -664,10 +1591,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "# write your code here\n",
- "\n"
- ]
+ "source": []
},
{
"cell_type": "markdown",
@@ -695,7 +1619,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -709,7 +1633,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.8"
+ "version": "3.12.2"
},
"widgets": {
"state": {},