@@ -6,12 +6,16 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
"import pandas as pd\n",
"import numpy as np\n",
"import math\n",
"from datetime import datetime\n",
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -29,42 +33,390 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16042239"
"(100000, 25)"
]
},
"execution_count": 11,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['normTitleCategory'].count()"
"# US Sample data\n",
"df = pd.read_csv('data/sample1.csv')\n",
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 95,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Series' object has no attribute 'columns'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-95-4d542a71f7a6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'normTitleCategory'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 3608\u001b[0m if (name in self._internal_names_set or name in self._metadata or\n\u001b[0;32m 3609\u001b[0m name in self._accessors):\n\u001b[1;32m-> 3610\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3611\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3612\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAttributeError\u001b[0m: 'Series' object has no attribute 'columns'"
]
}
],
"source": [
"df['normTitleCategory'].columns.values"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"22217"
"companyId jobId \n",
"company00003 job0001121 2016-10-06, 2016-10-10\n",
" job0005179 2016-11-06\n",
" job0010571 2016-11-21, 2016-11-28\n",
" job0021975 2017-01-31, 2017-03-08\n",
" job0023964 2017-03-04, 2017-02-03\n",
" job0025943 2017-04-03\n",
" job0026922 2017-03-13, 2017-03-21, 2017-03-31, 2017-03-14\n",
" job0054913 2017-08-31, 2017-06-25, 2017-09-03\n",
" job0056115 2017-08-05\n",
" job0067749 2017-09-09, 2017-09-07\n",
"Name: date, dtype: object"
]
},
"execution_count": 14,
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df['']"
"listings = df_small.groupby(['companyId', 'jobId'])['date'].apply(', '.join).reset_index()\n",
"listings.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished calculating\n",
"Finished joining\n",
"Finished aggregating\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>city</th>\n",
" <th>stateProvince</th>\n",
" <th>TotalClicks</th>\n",
" <th>AvgDays</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29 Palms</td>\n",
" <td>CA</td>\n",
" <td>60</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Abbeville</td>\n",
" <td>LA</td>\n",
" <td>111</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Abbeville</td>\n",
" <td>SC</td>\n",
" <td>9</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Abbotsford</td>\n",
" <td>WI</td>\n",
" <td>27</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abbott</td>\n",
" <td>TX</td>\n",
" <td>15</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Abbott Park</td>\n",
" <td>IL</td>\n",
" <td>81</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Aberdeen</td>\n",
" <td>MD</td>\n",
" <td>537</td>\n",
" <td>7.400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Aberdeen</td>\n",
" <td>NC</td>\n",
" <td>55</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Aberdeen</td>\n",
" <td>NJ</td>\n",
" <td>18</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Aberdeen</td>\n",
" <td>SD</td>\n",
" <td>256</td>\n",
" <td>2.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Aberdeen</td>\n",
" <td>WA</td>\n",
" <td>105</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Aberdeen Proving Ground</td>\n",
" <td>MD</td>\n",
" <td>233</td>\n",
" <td>3.545455</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Abilene</td>\n",
" <td>KS</td>\n",
" <td>76</td>\n",
" <td>8.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Abilene</td>\n",
" <td>TX</td>\n",
" <td>1592</td>\n",
" <td>3.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Abingdon</td>\n",
" <td>MD</td>\n",
" <td>47</td>\n",
" <td>1.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Abingdon</td>\n",
" <td>VA</td>\n",
" <td>131</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Abington</td>\n",
" <td>MA</td>\n",
" <td>92</td>\n",
" <td>16.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Abington</td>\n",
" <td>PA</td>\n",
" <td>194</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Absecon</td>\n",
" <td>NJ</td>\n",
" <td>683</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Academy</td>\n",
" <td>PA</td>\n",
" <td>25</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" city stateProvince TotalClicks AvgDays\n",
"0 29 Palms CA 60 1.000000\n",
"1 Abbeville LA 111 1.000000\n",
"2 Abbeville SC 9 1.000000\n",
"3 Abbotsford WI 27 1.000000\n",
"4 Abbott TX 15 1.000000\n",
"5 Abbott Park IL 81 1.000000\n",
"6 Aberdeen MD 537 7.400000\n",
"7 Aberdeen NC 55 1.000000\n",
"8 Aberdeen NJ 18 1.000000\n",
"9 Aberdeen SD 256 2.666667\n",
"10 Aberdeen WA 105 5.000000\n",
"11 Aberdeen Proving Ground MD 233 3.545455\n",
"12 Abilene KS 76 8.666667\n",
"13 Abilene TX 1592 3.500000\n",
"14 Abingdon MD 47 1.500000\n",
"15 Abingdon VA 131 1.000000\n",
"16 Abington MA 92 16.000000\n",
"17 Abington PA 194 1.000000\n",
"18 Absecon NJ 683 1.000000\n",
"19 Academy PA 25 1.000000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Generate Days Posted for each listing\n",
"def date_difference(row):\n",
" end_day = datetime.strptime(row['EndDate'], '%Y-%m-%d')\n",
" start_day = datetime.strptime(row['PostDate'], '%Y-%m-%d')\n",
" return (end_day-start_day).days + 1\n",
"\n",
"listings = df.groupby(['companyId', 'jobId'])\n",
"earliest_date = listings['date'].min().reset_index(name='PostDate')\n",
"latest_date = listings['date'].max().reset_index(name='EndDate')\n",
"click_count = listings['clicks'].sum().reset_index(name='Clicks')\n",
"dates = earliest_date.merge(latest_date).merge(click_count)\n",
"dates['DaysPosted'] = dates.apply(date_difference, axis=1)\n",
"print(\"Finished calculating\")\n",
"dates = dates.merge(df, on=['companyId', 'jobId'])\n",
"print(\"Finished joining\")\n",
"dates = dates[['city', 'stateProvince', 'Clicks', 'PostDate', 'DaysPosted']].drop_duplicates()\n",
"dates = dates.rename(index=str, columns={'Clicks': 'TotalClicks', 'DaysPosted': 'AvgDays'})\n",
"city_listings = dates.groupby(['city', 'stateProvince']).agg({'TotalClicks': 'sum', 'AvgDays': 'mean'}).reset_index()\n",
"print(\"Finished aggregating\")\n",
"city_listings.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9102, 4)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"city_listings.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished writing to csv\n"
]
}
],
"source": [
"city_listings.to_csv('data/listing_time.csv')\n",
"print(\"Finished writing to csv\")"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "('clicks', 'occurred at index Unnamed: 0')",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: an integer is required",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-90-58b882dd31b1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Regression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mlm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[0;32m 4875\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4876\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4877\u001b[1;33m ignore_failures=ignore_failures)\n\u001b[0m\u001b[0;32m 4878\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4879\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[1;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[0;32m 4971\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4972\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4973\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4974\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4975\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m<ipython-input-90-58b882dd31b1>\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(r)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Regression\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mlm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mmath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mlm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'descriptionWordCount'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'log_clicks'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 621\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 622\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 623\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 624\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 625\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\envs\\Datafest2018\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m 2558\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2559\u001b[0m return self._engine.get_value(s, k,\n\u001b[1;32m-> 2560\u001b[1;33m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m 2561\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2562\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'integer'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'boolean'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: ('clicks', 'occurred at index Unnamed: 0')"
]
}
],
"source": [
"# Regression\n",
"lm = LinearRegression(n_jobs=-1)\n",
"independent = df[['']]"
]
}
],