From 99d725d612a5f549d6e220d9308ab9e1ae0d3641 Mon Sep 17 00:00:00 2001 From: Wejdan Alharthi <101019204+WejdanMjd@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:21:16 +0300 Subject: [PATCH] Add files via upload --- lab-logistic-regression-with-python.ipynb | 1085 ++++++++++++++++++++- 1 file changed, 1031 insertions(+), 54 deletions(-) diff --git a/lab-logistic-regression-with-python.ipynb b/lab-logistic-regression-with-python.ipynb index 05ead5e..5b8f687 100644 --- a/lab-logistic-regression-with-python.ipynb +++ b/lab-logistic-regression-with-python.ipynb @@ -123,9 +123,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'piplite'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpiplite\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m piplite\u001b[38;5;241m.\u001b[39minstall([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpandas\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m piplite\u001b[38;5;241m.\u001b[39minstall([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmatplotlib\u001b[39m\u001b[38;5;124m'\u001b[39m])\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'piplite'" + ] + } + ], "source": [ "import piplite\n", "await piplite.install(['pandas'])\n", @@ -150,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "button": false, "new_sheet": false, @@ -171,9 +183,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pyodide'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[4], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyodide\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhttp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyfetch\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdownload\u001b[39m(url, filename):\n\u001b[0;32m 4\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m pyfetch(url)\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pyodide'" + ] + } + ], "source": [ "from pyodide.http import pyfetch\n", "\n", @@ -226,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "button": false, "new_sheet": false, @@ -262,18 +286,7 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "await download(path, \"ChurnData.csv\")\n", - "path=\"ChurnData.csv\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "button": false, "new_sheet": false, @@ -281,9 +294,209 @@ "read_only": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tenureageaddressincomeedemployequipcallcardwirelesslongmon...pagerinternetcallwaitconferebillloglonglogtolllninccustcatchurn
011.033.07.0136.05.05.00.01.01.04.40...1.00.01.01.00.01.4823.0334.9134.01.0
133.033.012.033.02.00.00.00.00.09.45...0.00.00.00.00.02.2463.2403.4971.01.0
223.030.09.030.01.02.00.00.00.06.30...0.00.00.01.00.01.8413.2403.4013.00.0
338.035.05.076.02.010.01.01.01.06.05...1.01.01.01.01.01.8003.8074.3314.00.0
47.035.014.080.02.015.00.01.00.07.10...0.00.01.01.00.01.9603.0914.3823.00.0
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " tenure age address income ed employ equip callcard wireless \\\n", + "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n", + "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n", + "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n", + "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n", + "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n", + "\n", + " longmon ... pager internet callwait confer ebill loglong logtoll \\\n", + "0 4.40 ... 1.0 0.0 1.0 1.0 0.0 1.482 3.033 \n", + "1 9.45 ... 0.0 0.0 0.0 0.0 0.0 2.246 3.240 \n", + "2 6.30 ... 0.0 0.0 0.0 1.0 0.0 1.841 3.240 \n", + "3 6.05 ... 1.0 1.0 1.0 1.0 1.0 1.800 3.807 \n", + "4 7.10 ... 0.0 0.0 1.0 1.0 0.0 1.960 3.091 \n", + "\n", + " lninc custcat churn \n", + "0 4.913 4.0 1.0 \n", + "1 3.497 1.0 1.0 \n", + "2 3.401 3.0 0.0 \n", + "3 4.331 4.0 0.0 \n", + "4 4.382 3.0 0.0 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "churn_df = pd.read_csv(path)\n", + "churn_df = pd.read_csv(r\"C:\\Users\\HUAWI\\Downloads\\ChurnData.csv\")\n", "churn_df.head()" ] }, @@ -303,9 +516,133 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tenureageaddressincomeedemployequipcallcardwirelesschurn
011.033.07.0136.05.05.00.01.01.01
133.033.012.033.02.00.00.00.00.01
223.030.09.030.01.02.00.00.00.00
338.035.05.076.02.010.01.01.01.00
47.035.014.080.02.015.00.01.00.00
\n", + "
" + ], + "text/plain": [ + " tenure age address income ed employ equip callcard wireless \\\n", + "0 11.0 33.0 7.0 136.0 5.0 5.0 0.0 1.0 1.0 \n", + "1 33.0 33.0 12.0 33.0 2.0 0.0 0.0 0.0 0.0 \n", + "2 23.0 30.0 9.0 30.0 1.0 2.0 0.0 0.0 0.0 \n", + "3 38.0 35.0 5.0 76.0 2.0 10.0 1.0 1.0 1.0 \n", + "4 7.0 35.0 14.0 80.0 2.0 15.0 0.0 1.0 0.0 \n", + "\n", + " churn \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "churn_df = churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip', 'callcard', 'wireless','churn']]\n", "churn_df['churn'] = churn_df['churn'].astype('int')\n", @@ -329,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "button": false, "new_sheet": false, @@ -337,9 +674,34 @@ "read_only": false } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 200 entries, 0 to 199\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 tenure 200 non-null float64\n", + " 1 age 200 non-null float64\n", + " 2 address 200 non-null float64\n", + " 3 income 200 non-null float64\n", + " 4 ed 200 non-null float64\n", + " 5 employ 200 non-null float64\n", + " 6 equip 200 non-null float64\n", + " 7 callcard 200 non-null float64\n", + " 8 wireless 200 non-null float64\n", + " 9 churn 200 non-null int32 \n", + "dtypes: float64(9), int32(1)\n", + "memory usage: 15.0 KB\n" + ] + } + ], "source": [ - "# write your code here\n" + "# write your code here\n", + "churn_df.info()\n" ] }, { @@ -365,9 +727,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 11., 33., 7., 136., 5., 5., 0.],\n", + " [ 33., 33., 12., 33., 2., 0., 0.],\n", + " [ 23., 30., 9., 30., 1., 2., 0.],\n", + " [ 38., 35., 5., 76., 2., 10., 1.],\n", + " [ 7., 35., 14., 80., 2., 15., 0.]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "X = np.asarray(churn_df[['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip']])\n", "X[0:5]" @@ -375,9 +752,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 0, 0, 0])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "y = np.asarray(churn_df['churn'])\n", "y [0:5]" @@ -392,9 +780,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-1.13518441, -0.62595491, -0.4588971 , 0.4751423 , 1.6961288 ,\n", + " -0.58477841, -0.85972695],\n", + " [-0.11604313, -0.62595491, 0.03454064, -0.32886061, -0.6433592 ,\n", + " -1.14437497, -0.85972695],\n", + " [-0.57928917, -0.85594447, -0.261522 , -0.35227817, -1.42318853,\n", + " -0.92053635, -0.85972695],\n", + " [ 0.11557989, -0.47262854, -0.65627219, 0.00679109, -0.6433592 ,\n", + " -0.02518185, 1.16316 ],\n", + " [-1.32048283, -0.47262854, 0.23191574, 0.03801451, -0.6433592 ,\n", + " 0.53441472, -0.85972695]])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn import preprocessing\n", "X = preprocessing.StandardScaler().fit(X).transform(X)\n", @@ -417,9 +825,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train set: (160, 7) (160,)\n", + "Test set: (40, 7) (40,)\n" + ] + } + ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)\n", @@ -447,9 +864,438 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
LogisticRegression(C=0.01, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression(C=0.01, solver='liblinear')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import confusion_matrix\n", @@ -466,9 +1312,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "yhat = LR.predict(X_test)\n", "yhat" @@ -483,9 +1341,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.54132919, 0.45867081],\n", + " [0.60593357, 0.39406643],\n", + " [0.56277713, 0.43722287],\n", + " [0.63432489, 0.36567511],\n", + " [0.56431839, 0.43568161],\n", + " [0.55386646, 0.44613354],\n", + " [0.52237207, 0.47762793],\n", + " [0.60514349, 0.39485651],\n", + " [0.41069572, 0.58930428],\n", + " [0.6333873 , 0.3666127 ],\n", + " [0.58068791, 0.41931209],\n", + " [0.62768628, 0.37231372],\n", + " [0.47559883, 0.52440117],\n", + " [0.4267593 , 0.5732407 ],\n", + " [0.66172417, 0.33827583],\n", + " [0.55092315, 0.44907685],\n", + " [0.51749946, 0.48250054],\n", + " [0.485743 , 0.514257 ],\n", + " [0.49011451, 0.50988549],\n", + " [0.52423349, 0.47576651],\n", + " [0.61619519, 0.38380481],\n", + " [0.52696302, 0.47303698],\n", + " [0.63957168, 0.36042832],\n", + " [0.52205164, 0.47794836],\n", + " [0.50572852, 0.49427148],\n", + " [0.70706202, 0.29293798],\n", + " [0.55266286, 0.44733714],\n", + " [0.52271594, 0.47728406],\n", + " [0.51638863, 0.48361137],\n", + " [0.71331391, 0.28668609],\n", + " [0.67862111, 0.32137889],\n", + " [0.50896403, 0.49103597],\n", + " [0.42348082, 0.57651918],\n", + " [0.71495838, 0.28504162],\n", + " [0.59711064, 0.40288936],\n", + " [0.63808839, 0.36191161],\n", + " [0.39957895, 0.60042105],\n", + " [0.52127638, 0.47872362],\n", + " [0.65975464, 0.34024536],\n", + " [0.5114172 , 0.4885828 ]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "yhat_prob = LR.predict_proba(X_test)\n", "yhat_prob" @@ -509,9 +1417,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7058823529411765" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.metrics import jaccard_score\n", "jaccard_score(y_test, yhat,pos_label=0)" @@ -528,9 +1447,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 6 9]\n", + " [ 1 24]]\n" + ] + } + ], "source": [ "from sklearn.metrics import classification_report, confusion_matrix\n", "import itertools\n", @@ -572,9 +1500,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion matrix, without normalization\n", + "[[ 6 9]\n", + " [ 1 24]]\n" + ] + } + ], "source": [ "# Compute confusion matrix\n", "cnf_matrix = confusion_matrix(y_test, yhat, labels=[1,0])\n", @@ -604,9 +1542,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.73 0.96 0.83 25\n", + " 1 0.86 0.40 0.55 15\n", + "\n", + " accuracy 0.75 40\n", + " macro avg 0.79 0.68 0.69 40\n", + "weighted avg 0.78 0.75 0.72 40\n", + "\n" + ] + } + ], "source": [ "print (classification_report(y_test, yhat))\n" ] @@ -643,9 +1597,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.6017092478101186" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.metrics import log_loss\n", "log_loss(y_test, yhat_prob)" @@ -661,12 +1626,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LogLoss: :0.591816\n" + ] + } + ], "source": [ "# write your code here\n", - "\n" + "LR_new = LogisticRegression(C=0.1, solver='newton-cg').fit(X_train, y_train)\n", + "yhat_prob_new = LR_new.predict_proba(X_test)\n", + "log_loss_new = log_loss(y_test, yhat_prob_new)\n", + "\n", + "print(\"LogLoss: :%2f\" %log_loss(y_test,yhat_prob_new) )\n" ] }, { @@ -695,7 +1672,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -709,7 +1686,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.7" }, "widgets": { "state": {},