@@ -6,12 +6,16 @@
"metadata" : {},
"outputs" : [],
"source" : [
" import pandas as pd"
" import pandas as pd\n " ,
" import numpy as np\n " ,
" import math\n " ,
" from datetime import datetime\n " ,
" from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type" : " code" ,
"execution_count" : 3 ,
"execution_count" : 17 ,
"metadata" : {},
"outputs" : [],
"source" : [
@@ -29,42 +33,390 @@
},
{
"cell_type" : " code" ,
"execution_count" : 11 ,
"execution_count" : 3 ,
"metadata" : {},
"outputs" : [
{
"data" : {
"text/plain" : [
" 16042239 "
" (100000, 25) "
]
},
"execution_count" : 11 ,
"execution_count" : 3 ,
"metadata" : {},
"output_type" : " execute_result"
}
],
"source" : [
" df['normTitleCategory'].count()"
" # US Sample data\n " ,
" df = pd.read_csv('data/sample1.csv')\n " ,
" df.shape"
]
},
{
"cell_type" : " code" ,
"execution_count" : 14 ,
"execution_count" : 95 ,
"metadata" : {},
"outputs" : [
{
"ename" : " AttributeError" ,
"evalue" : " 'Series' object has no attribute 'columns'" ,
"output_type" : " error" ,
"traceback" : [
" \u001b [1;31m---------------------------------------------------------------------------\u001b [0m" ,
" \u001b [1;31mAttributeError\u001b [0m Traceback (most recent call last)" ,
" \u001b [1;32m<ipython-input-95-4d542a71f7a6>\u001b [0m in \u001b [0;36m<module>\u001b [1;34m()\u001b [0m\n \u001b [1;32m----> 1\u001b [1;33m \u001b [0mdf\u001b [0m\u001b [1;33m[\u001b [0m\u001b [1;34m'normTitleCategory'\u001b [0m\u001b [1;33m]\u001b [0m\u001b [1;33m.\u001b [0m\u001b [0mcolumns\u001b [0m\u001b [1;33m.\u001b [0m\u001b [0mvalues\u001b [0m\u001b [1;33m\u001b [0m\u001b [0m\n \u001b [0m" ,
" \u001b [1;32m~\\ Miniconda3\\ envs\\ Datafest2018\\ lib\\ site-packages\\ pandas\\ core\\ generic.py\u001b [0m in \u001b [0;36m__getattr__\u001b [1;34m(self, name)\u001b [0m\n \u001b [0;32m 3608\u001b [0m if (name in self._internal_names_set or name in self._metadata or\n \u001b [0;32m 3609\u001b [0m name in self._accessors):\n \u001b [1;32m-> 3610\u001b [1;33m \u001b [1;32mreturn\u001b [0m \u001b [0mobject\u001b [0m\u001b [1;33m.\u001b [0m\u001b [0m__getattribute__\u001b [0m\u001b [1;33m(\u001b [0m\u001b [0mself\u001b [0m\u001b [1;33m,\u001b [0m \u001b [0mname\u001b [0m\u001b [1;33m)\u001b [0m\u001b [1;33m\u001b [0m\u001b [0m\n \u001b [0m\u001b [0;32m 3611\u001b [0m \u001b [1;32melse\u001b [0m\u001b [1;33m:\u001b [0m\u001b [1;33m\u001b [0m\u001b [0m\n \u001b [0;32m 3612\u001b [0m \u001b [1;32mif\u001b [0m \u001b [0mname\u001b [0m \u001b [1;32min\u001b [0m \u001b [0mself\u001b [0m\u001b [1;33m.\u001b [0m\u001b [0m_info_axis\u001b [0m\u001b [1;33m:\u001b [0m\u001b [1;33m\u001b [0m\u001b [0m\n " ,
" \u001b [1;31mAttributeError\u001b [0m: 'Series' object has no attribute 'columns'"
]
}
],
"source" : [
" df['normTitleCategory'].columns.values"
]
},
{
"cell_type" : " code" ,
"execution_count" : 30 ,
"metadata" : {},
"outputs" : [
{
"data" : {
"text/plain" : [
" 22217"
" companyId jobId \n " ,
" company00003 job0001121 2016-10-06, 2016-10-10\n " ,
" job0005179 2016-11-06\n " ,
" job0010571 2016-11-21, 2016-11-28\n " ,
" job0021975 2017-01-31, 2017-03-08\n " ,
" job0023964 2017-03-04, 2017-02-03\n " ,
" job0025943 2017-04-03\n " ,
" job0026922 2017-03-13, 2017-03-21, 2017-03-31, 2017-03-14\n " ,
" job0054913 2017-08-31, 2017-06-25, 2017-09-03\n " ,
" job0056115 2017-08-05\n " ,
" job0067749 2017-09-09, 2017-09-07\n " ,
" Name: date, dtype: object"
]
},
"execution_count" : 14 ,
"execution_count" : 30 ,
"metadata" : {},
"output_type" : " execute_result"
}
],
"source" : [
" df = df['']"
" listings = df_small.groupby(['companyId', 'jobId'])['date'].apply(', '.join).reset_index()\n " ,
" listings.head(10)"
]
},
{
"cell_type" : " code" ,
"execution_count" : 6 ,
"metadata" : {},
"outputs" : [
{
"name" : " stdout" ,
"output_type" : " stream" ,
"text" : [
" Finished calculating\n " ,
" Finished joining\n " ,
" Finished aggregating\n "
]
},
{
"data" : {
"text/html" : [
" <div>\n " ,
" <style scoped>\n " ,
" .dataframe tbody tr th:only-of-type {\n " ,
" vertical-align: middle;\n " ,
" }\n " ,
" \n " ,
" .dataframe tbody tr th {\n " ,
" vertical-align: top;\n " ,
" }\n " ,
" \n " ,
" .dataframe thead th {\n " ,
" text-align: right;\n " ,
" }\n " ,
" </style>\n " ,
" <table border=\" 1\" class=\" dataframe\" >\n " ,
" <thead>\n " ,
" <tr style=\" text-align: right;\" >\n " ,
" <th></th>\n " ,
" <th>city</th>\n " ,
" <th>stateProvince</th>\n " ,
" <th>TotalClicks</th>\n " ,
" <th>AvgDays</th>\n " ,
" </tr>\n " ,
" </thead>\n " ,
" <tbody>\n " ,
" <tr>\n " ,
" <th>0</th>\n " ,
" <td>29 Palms</td>\n " ,
" <td>CA</td>\n " ,
" <td>60</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>1</th>\n " ,
" <td>Abbeville</td>\n " ,
" <td>LA</td>\n " ,
" <td>111</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>2</th>\n " ,
" <td>Abbeville</td>\n " ,
" <td>SC</td>\n " ,
" <td>9</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>3</th>\n " ,
" <td>Abbotsford</td>\n " ,
" <td>WI</td>\n " ,
" <td>27</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>4</th>\n " ,
" <td>Abbott</td>\n " ,
" <td>TX</td>\n " ,
" <td>15</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>5</th>\n " ,
" <td>Abbott Park</td>\n " ,
" <td>IL</td>\n " ,
" <td>81</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>6</th>\n " ,
" <td>Aberdeen</td>\n " ,
" <td>MD</td>\n " ,
" <td>537</td>\n " ,
" <td>7.400000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>7</th>\n " ,
" <td>Aberdeen</td>\n " ,
" <td>NC</td>\n " ,
" <td>55</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>8</th>\n " ,
" <td>Aberdeen</td>\n " ,
" <td>NJ</td>\n " ,
" <td>18</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>9</th>\n " ,
" <td>Aberdeen</td>\n " ,
" <td>SD</td>\n " ,
" <td>256</td>\n " ,
" <td>2.666667</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>10</th>\n " ,
" <td>Aberdeen</td>\n " ,
" <td>WA</td>\n " ,
" <td>105</td>\n " ,
" <td>5.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>11</th>\n " ,
" <td>Aberdeen Proving Ground</td>\n " ,
" <td>MD</td>\n " ,
" <td>233</td>\n " ,
" <td>3.545455</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>12</th>\n " ,
" <td>Abilene</td>\n " ,
" <td>KS</td>\n " ,
" <td>76</td>\n " ,
" <td>8.666667</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>13</th>\n " ,
" <td>Abilene</td>\n " ,
" <td>TX</td>\n " ,
" <td>1592</td>\n " ,
" <td>3.500000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>14</th>\n " ,
" <td>Abingdon</td>\n " ,
" <td>MD</td>\n " ,
" <td>47</td>\n " ,
" <td>1.500000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>15</th>\n " ,
" <td>Abingdon</td>\n " ,
" <td>VA</td>\n " ,
" <td>131</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>16</th>\n " ,
" <td>Abington</td>\n " ,
" <td>MA</td>\n " ,
" <td>92</td>\n " ,
" <td>16.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>17</th>\n " ,
" <td>Abington</td>\n " ,
" <td>PA</td>\n " ,
" <td>194</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>18</th>\n " ,
" <td>Absecon</td>\n " ,
" <td>NJ</td>\n " ,
" <td>683</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" <tr>\n " ,
" <th>19</th>\n " ,
" <td>Academy</td>\n " ,
" <td>PA</td>\n " ,
" <td>25</td>\n " ,
" <td>1.000000</td>\n " ,
" </tr>\n " ,
" </tbody>\n " ,
" </table>\n " ,
" </div>"
],
"text/plain" : [
" city stateProvince TotalClicks AvgDays\n " ,
" 0 29 Palms CA 60 1.000000\n " ,
" 1 Abbeville LA 111 1.000000\n " ,
" 2 Abbeville SC 9 1.000000\n " ,
" 3 Abbotsford WI 27 1.000000\n " ,
" 4 Abbott TX 15 1.000000\n " ,
" 5 Abbott Park IL 81 1.000000\n " ,
" 6 Aberdeen MD 537 7.400000\n " ,
" 7 Aberdeen NC 55 1.000000\n " ,
" 8 Aberdeen NJ 18 1.000000\n " ,
" 9 Aberdeen SD 256 2.666667\n " ,
" 10 Aberdeen WA 105 5.000000\n " ,
" 11 Aberdeen Proving Ground MD 233 3.545455\n " ,
" 12 Abilene KS 76 8.666667\n " ,
" 13 Abilene TX 1592 3.500000\n " ,
" 14 Abingdon MD 47 1.500000\n " ,
" 15 Abingdon VA 131 1.000000\n " ,
" 16 Abington MA 92 16.000000\n " ,
" 17 Abington PA 194 1.000000\n " ,
" 18 Absecon NJ 683 1.000000\n " ,
" 19 Academy PA 25 1.000000"
]
},
"execution_count" : 6 ,
"metadata" : {},
"output_type" : " execute_result"
}
],
"source" : [
" # Generate Days Posted for each listing\n " ,
" def date_difference(row):\n " ,
" end_day = datetime.strptime(row['EndDate'], '%Y-%m-%d')\n " ,
" start_day = datetime.strptime(row['PostDate'], '%Y-%m-%d')\n " ,
" return (end_day-start_day).days + 1\n " ,
" \n " ,
" listings = df.groupby(['companyId', 'jobId'])\n " ,
" earliest_date = listings['date'].min().reset_index(name='PostDate')\n " ,
" latest_date = listings['date'].max().reset_index(name='EndDate')\n " ,
" click_count = listings['clicks'].sum().reset_index(name='Clicks')\n " ,
" dates = earliest_date.merge(latest_date).merge(click_count)\n " ,
" dates['DaysPosted'] = dates.apply(date_difference, axis=1)\n " ,
" print(\" Finished calculating\" )\n " ,
" dates = dates.merge(df, on=['companyId', 'jobId'])\n " ,
" print(\" Finished joining\" )\n " ,
" dates = dates[['city', 'stateProvince', 'Clicks', 'PostDate', 'DaysPosted']].drop_duplicates()\n " ,
" dates = dates.rename(index=str, columns={'Clicks': 'TotalClicks', 'DaysPosted': 'AvgDays'})\n " ,
" city_listings = dates.groupby(['city', 'stateProvince']).agg({'TotalClicks': 'sum', 'AvgDays': 'mean'}).reset_index()\n " ,
" print(\" Finished aggregating\" )\n " ,
" city_listings.head(20)"
]
},
{
"cell_type" : " code" ,
"execution_count" : 8 ,
"metadata" : {},
"outputs" : [
{
"data" : {
"text/plain" : [
" (9102, 4)"
]
},
"execution_count" : 8 ,
"metadata" : {},
"output_type" : " execute_result"
}
],
"source" : [
" city_listings.shape"
]
},
{
"cell_type" : " code" ,
"execution_count" : 9 ,
"metadata" : {},
"outputs" : [
{
"name" : " stdout" ,
"output_type" : " stream" ,
"text" : [
" Finished writing to csv\n "
]
}
],
"source" : [
" city_listings.to_csv('data/listing_time.csv')\n " ,
" print(\" Finished writing to csv\" )"
]
},
{
"cell_type" : " code" ,
"execution_count" : 90 ,
"metadata" : {},
"outputs" : [
{
"ename" : " KeyError" ,
"evalue" : " ('clicks', 'occurred at index Unnamed: 0')" ,
"output_type" : " error" ,
"traceback" : [
" \u001b [1;31m---------------------------------------------------------------------------\u001b [0m" ,
" \u001b [1;31mTypeError\u001b [0m Traceback (most recent call last)" ,
" \u001b [1;32mpandas\\ _libs\\ index.pyx\u001b [0m in \u001b [0;36mpandas._libs.index.IndexEngine.get_loc\u001b [1;34m()\u001b [0m\n " ,
" \u001b [1;32mpandas\\ _libs\\ hashtable_class_helper.pxi\u001b [0m in \u001b [0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b [1;34m()\u001b [0m\n " ,
" \u001b [1;31mTypeError\u001b [0m: an integer is required" ,
" \n During handling of the above exception, another exception occurred:\n " ,
" \u001b [1;31mKeyError\u001b [0m Traceback (most recent call last)" ,
"\u001b[1;32m<ipython-input-90-58b882dd31b1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Regression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mlm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[0;32m 4875\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4876\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4877\u001b[1;33m ignore_failures=ignore_failures)\n\u001b[0m\u001b[0;32m 4878\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4879\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[1;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[0;32m 4971\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4972\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4973\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4974\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4975\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m<ipython-input-90-58b882dd31b1>\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(r)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Regression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mlm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 621\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 622\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 623\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 624\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 625\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m 2558\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2559\u001b[0m return self._engine.get_value(s, k,\n\u001b[1;32m-> 2560\u001b[1;33m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m 2561\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2562\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'integer'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'boolean'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
" \u001b [1;32mpandas\\ _libs\\ index.pyx\u001b [0m in \u001b [0;36mpandas._libs.index.IndexEngine.get_value\u001b [1;34m()\u001b [0m\n " ,
" \u001b [1;32mpandas\\ _libs\\ index.pyx\u001b [0m in \u001b [0;36mpandas._libs.index.IndexEngine.get_value\u001b [1;34m()\u001b [0m\n " ,
" \u001b [1;32mpandas\\ _libs\\ index.pyx\u001b [0m in \u001b [0;36mpandas._libs.index.IndexEngine.get_loc\u001b [1;34m()\u001b [0m\n " ,
" \u001b [1;31mKeyError\u001b [0m: ('clicks', 'occurred at index Unnamed: 0')"
]
}
],
"source" : [
" # Regression\n " ,
" lm = LinearRegression(n_jobs=-1)\n " ,
" independent = df[['']]"
]
}
],