diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec7fd08 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +ChurnData.csv diff --git a/lab-logistic-regression-with-python.ipynb b/lab-logistic-regression-with-python.ipynb index 05ead5e..72b53fe 100644 --- a/lab-logistic-regression-with-python.ipynb +++ b/lab-logistic-regression-with-python.ipynb @@ -123,16 +123,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "import piplite\n", - "await piplite.install(['pandas'])\n", - "await piplite.install(['matplotlib'])\n", - "await piplite.install(['numpy'])\n", - "await piplite.install(['scikit-learn'])\n", - "await piplite.install(['scipy'])\n" + "# import piplite\n", + "# await piplite.install(['pandas'])\n", + "# await piplite.install(['matplotlib'])\n", + "# await piplite.install(['numpy'])\n", + "# await piplite.install(['scikit-learn'])\n", + "# await piplite.install(['scipy'])\n" ] }, { @@ -150,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "button": false, "new_sheet": false, @@ -171,17 +171,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "from pyodide.http import pyfetch\n", + "import urllib.request # Add this import\n", "\n", - "async def download(url, filename):\n", - " response = await pyfetch(url)\n", - " if response.status == 200:\n", - " with open(filename, \"wb\") as f:\n", - " f.write(await response.bytes())\n" + "def download(url, filename):\n", + " print(f\"Downloading {url} to {filename} using urllib.request...\")\n", + " try:\n", + " urllib.request.urlretrieve(url, filename)\n", + " print(f\"Successfully downloaded {filename}.\")\n", + " except Exception as e:\n", + " print(f\"Failed to download {filename}. Error: {e}\")\n", + " # Depending on your needs, you might want to re-raise the exception\n", + " # or handle it more gracefully.\n", + " # raise\n" ] }, { @@ -226,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "button": false, "new_sheet": false, @@ -262,9 +267,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%203/data/ChurnData.csv to ChurnData.csv using urllib.request...\n", + "Successfully downloaded ChurnData.csv.\n" + ] + }, + { + "ename": "TypeError", + "evalue": "object NoneType can't be used in 'await' expression", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m download(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mChurnData.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2\u001b[0m path\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mChurnData.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "\u001b[0;31mTypeError\u001b[0m: object NoneType can't be used in 'await' expression" + ] + } + ], "source": [ "\n", "await download(path, \"ChurnData.csv\")\n", @@ -273,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "button": false, "new_sheet": false, @@ -281,7 +306,207 @@ "read_only": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tenureageaddressincomeedemployequipcallcardwirelesslongmon...pagerinternetcallwaitconferebillloglonglogtolllninccustcatchurn
011.033.07.0136.05.05.00.01.01.04.40...1.00.01.01.00.01.4823.0334.9134.01.0
133.033.012.033.02.00.00.00.00.09.45...0.00.00.00.00.02.2463.2403.4971.01.0
223.030.09.030.01.02.00.00.00.06.30...0.00.00.01.00.01.8413.2403.4013.00.0
338.035.05.076.02.010.01.01.01.06.05...1.01.01.01.01.01.8003.8074.3314.00.0
47.035.014.080.02.015.00.01.00.07.10...0.00.01.01.00.01.9603.0914.3823.00.0
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " tenure age address income ed employ equip callcard wireless \\\n", + "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n", + "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n", + "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n", + "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n", + "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n", + "\n", + " longmon ... pager internet callwait confer ebill loglong logtoll \\\n", + "0 4.40 ... 1.0 0.0 1.0 1.0 0.0 1.482 3.033 \n", + "1 9.45 ... 0.0 0.0 0.0 0.0 0.0 2.246 3.240 \n", + "2 6.30 ... 0.0 0.0 0.0 1.0 0.0 1.841 3.240 \n", + "3 6.05 ... 1.0 1.0 1.0 1.0 1.0 1.800 3.807 \n", + "4 7.10 ... 0.0 0.0 1.0 1.0 0.0 1.960 3.091 \n", + "\n", + " lninc custcat churn \n", + "0 4.913 4.0 1.0 \n", + "1 3.497 1.0 1.0 \n", + "2 3.401 3.0 0.0 \n", + "3 4.331 4.0 0.0 \n", + "4 4.382 3.0 0.0 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "churn_df = pd.read_csv(path)\n", "churn_df.head()" @@ -303,9 +528,133 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tenureageaddressincomeedemployequipcallcardwirelesschurn
011.033.07.0136.05.05.00.01.01.01
133.033.012.033.02.00.00.00.00.01
223.030.09.030.01.02.00.00.00.00
338.035.05.076.02.010.01.01.01.00
47.035.014.080.02.015.00.01.00.00
\n", + "
" + ], + "text/plain": [ + " tenure age address income ed employ equip callcard wireless \\\n", + "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n", + "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n", + "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n", + "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n", + "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n", + "\n", + " churn \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "churn_df = churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip', 'callcard', 'wireless','churn']]\n", "churn_df['churn'] = churn_df['churn'].astype('int')\n", @@ -329,7 +678,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "button": false, "new_sheet": false, @@ -337,9 +686,21 @@ "read_only": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(200, 10)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# write your code here\n" + "# write your code here\n", + "churn_df.shape" ] }, { @@ -365,9 +726,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 11., 33., 7., 136., 5., 5., 0.],\n", + " [ 33., 33., 12., 33., 2., 0., 0.],\n", + " [ 23., 30., 9., 30., 1., 2., 0.],\n", + " [ 38., 35., 5., 76., 2., 10., 1.],\n", + " [ 7., 35., 14., 80., 2., 15., 0.]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "X = np.asarray(churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip']])\n", "X[0:5]" @@ -375,9 +751,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 0, 0, 0])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "y = np.asarray(churn_df['churn'])\n", "y [0:5]" @@ -392,9 +779,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-1.13518441, -0.62595491, -0.4588971 , 0.4751423 , 1.6961288 ,\n", + " -0.58477841, -0.85972695],\n", + " [-0.11604313, -0.62595491, 0.03454064, -0.32886061, -0.6433592 ,\n", + " -1.14437497, -0.85972695],\n", + " [-0.57928917, -0.85594447, -0.261522 , -0.35227817, -1.42318853,\n", + " -0.92053635, -0.85972695],\n", + " [ 0.11557989, -0.47262854, -0.65627219, 0.00679109, -0.6433592 ,\n", + " -0.02518185, 1.16316 ],\n", + " [-1.32048283, -0.47262854, 0.23191574, 0.03801451, -0.6433592 ,\n", + " 0.53441472, -0.85972695]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn import preprocessing\n", "X = preprocessing.StandardScaler().fit(X).transform(X)\n", @@ -417,9 +824,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train set: (160, 7) (160,)\n", + "Test set: (40, 7) (40,)\n" + ] + } + ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)\n", @@ -447,9 +863,427 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
LogisticRegression(C=0.01, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression(C=0.01, solver='liblinear')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import confusion_matrix\n", @@ -466,9 +1300,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "yhat = LR.predict(X_test)\n", "yhat" @@ -483,9 +1329,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.54132919, 0.45867081],\n", + " [0.60593357, 0.39406643],\n", + " [0.56277713, 0.43722287],\n", + " [0.63432489, 0.36567511],\n", + " [0.56431839, 0.43568161],\n", + " [0.55386646, 0.44613354],\n", + " [0.52237207, 0.47762793],\n", + " [0.60514349, 0.39485651],\n", + " [0.41069572, 0.58930428],\n", + " [0.6333873 , 0.3666127 ],\n", + " [0.58068791, 0.41931209],\n", + " [0.62768628, 0.37231372],\n", + " [0.47559883, 0.52440117],\n", + " [0.4267593 , 0.5732407 ],\n", + " [0.66172417, 0.33827583],\n", + " [0.55092315, 0.44907685],\n", + " [0.51749946, 0.48250054],\n", + " [0.485743 , 0.514257 ],\n", + " [0.49011451, 0.50988549],\n", + " [0.52423349, 0.47576651],\n", + " [0.61619519, 0.38380481],\n", + " [0.52696302, 0.47303698],\n", + " [0.63957168, 0.36042832],\n", + " [0.52205164, 0.47794836],\n", + " [0.50572852, 0.49427148],\n", + " [0.70706202, 0.29293798],\n", + " [0.55266286, 0.44733714],\n", + " [0.52271594, 0.47728406],\n", + " [0.51638863, 0.48361137],\n", + " [0.71331391, 0.28668609],\n", + " [0.67862111, 0.32137889],\n", + " [0.50896403, 0.49103597],\n", + " [0.42348082, 0.57651918],\n", + " [0.71495838, 0.28504162],\n", + " [0.59711064, 0.40288936],\n", + " [0.63808839, 0.36191161],\n", + " [0.39957895, 0.60042105],\n", + " [0.52127638, 0.47872362],\n", + " [0.65975464, 0.34024536],\n", + " [0.5114172 , 0.4885828 ]])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "yhat_prob = LR.predict_proba(X_test)\n", "yhat_prob" @@ -509,9 +1405,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7058823529411765" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.metrics import jaccard_score\n", "jaccard_score(y_test, yhat,pos_label=0)" @@ -528,9 +1435,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 6 9]\n", + " [ 1 24]]\n" + ] + } + ], "source": [ "from sklearn.metrics import classification_report, confusion_matrix\n", "import itertools\n", @@ -572,9 +1488,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion matrix, without normalization\n", + "[[ 6 9]\n", + " [ 1 24]]\n" + ] + } + ], "source": [ "# Compute confusion matrix\n", "cnf_matrix = confusion_matrix(y_test, yhat, labels=[1,0])\n", @@ -604,9 +1530,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.73 0.96 0.83 25\n", + " 1 0.86 0.40 0.55 15\n", + "\n", + " accuracy 0.75 40\n", + " macro avg 0.79 0.68 0.69 40\n", + "weighted avg 0.78 0.75 0.72 40\n", + "\n" + ] + } + ], "source": [ "print (classification_report(y_test, yhat))\n" ] @@ -643,9 +1585,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.6017092478101185" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.metrics import log_loss\n", "log_loss(y_test, yhat_prob)" @@ -661,12 +1614,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LogLoss: : 0.61\n" + ] + } + ], "source": [ "# write your code here\n", - "\n" + "LR2 = LogisticRegression(C=0.01, solver='sag').fit(X_train,y_train)\n", + "yhat_prob2 = LR2.predict_proba(X_test)\n", + "print (\"LogLoss: : %.2f\" % log_loss(y_test, yhat_prob2))\n" ] }, { @@ -695,7 +1658,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -709,7 +1672,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.7" }, "widgets": { "state": {},