### How can I replace the 0 values in df_2014 with 1 or the ```df_holiday['Holday']``` value for the index matching the holiday's date?

In [64]:
import numpy as np
import pandas as pd 
import seaborn
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
import matplotlib.pyplot as plt 
import matplotlib.dates as dates
import datetime
from scipy import stats
%matplotlib inline

**Grab data from master NYC file, sort and group to show complaint amount by type for each day**

In [65]:
cols = ['Created_Date', 'Complaint_Type']
df = pd.read_csv('NYC.csv', parse_dates=['Created_Date'], usecols=cols, index_col='Created_Date')
df = pd.get_dummies(df.Complaint_Type)
df = df.reset_index()
df = df.groupby(df['Created_Date']).sum()
df.reset_index().head()
df = df.sort_index()

**Read in weather data for the years being analyzed and index by date**

In [66]:
keepers = ['EST', 'Max TemperatureF', 'Events']
df_weather_2014 = pd.read_csv('clean_2014_weather.csv', usecols=keepers, parse_dates='EST')
df_weather_2015 = pd.read_csv('clean_2015_weather.csv', usecols=keepers, parse_dates='EST')
df_weather_2014 = df_weather_2014.set_index('EST')
df_weather_2015 = df_weather_2015.set_index('EST')

**Join the weather event to the year's dataframe, initialize 'Holiday' column with 0 vals**

In [67]:
df_2014_final = df[:365].join(df_weather_2014)
df_2015_final = df[365:].join(df_weather_2015)
#df_2014['Holiday'] = 0
#df_2015['Holiday'] = 0

**Rename columns to better fit my screen**

In [68]:
df_2014_final.columns = ['Building Conditions', 'General Request', 'Living Conditions', 'Parking Enforcement', 'Signal/Sign Repair', 'Street/Sidewalk Repair', 'Max TempF', 'Weather']
df_2015_final.columns = ['Building Conditions', 'General Request', 'Living Conditions', 'Parking Enforcement', 'Signal/Sign Repair', 'Street/Sidewalk Repair', 'Max TempF', 'Weather']

**Read in holiday csv, contains holidays and dates for each year**

In [69]:
#df_holidays = pd.read_csv('../../../holidays.csv', parse_dates=['2014_date', '2015_date'])

In [70]:
#df_holidays2 = df_holidays.set_index('2014_date')
#df_holidays3 = df_holidays.set_index('2015_date')

In [71]:
#df_2014.join(df_holidays2, lsuffix='_')
#df_2015.join(df_holidays3, lsuffix='_')

In [72]:
#df_2014_final = df_2014.join(df_holidays2, lsuffix='_')
#df_2015_final = df_2015.join(df_holidays3, lsuffix='_')

In [73]:
#df_2014_final = df_2014_final.drop('Holiday_', axis=1).fillna(0)
#df_2014_final = df_2014_final.drop('2015_date', axis=1).fillna(0)
#df_2015_final = df_2015_final.drop('Holiday_', axis=1).fillna(0)
#df_2015_final = df_2015_final.drop('2014_date', axis=1).fillna(0)

In [74]:
#df_2014_final['Holiday'] = (df_2014_final['Holiday'] != 0).astype(int)
#df_2015_final['Holiday'] = (df_2015_final['Holiday'] != 0).astype(int)

In [75]:
encode = preprocessing.LabelEncoder()

In [76]:
encode.fit(df_2014_final['Weather'])

LabelEncoder()

In [77]:
df_2014_final['Weather'] = encode.transform(df_2014_final['Weather'])

In [78]:
encode.fit(df_2015_final['Weather'])

LabelEncoder()

In [79]:
df_2015_final['Weather'] = encode.transform(df_2015_final['Weather'])

In [80]:
df_weather_2016 = pd.read_csv('nyc_2016_weather.csv', parse_dates='Created_Date')

In [81]:
df_weather_2016.head()

Unnamed: 0,Created_Date,Max TempF
0,Fri\n1/1/2016,42°
1,Sat\n1/2/2016,40°
2,Sun\n1/3/2016,45°
3,Mon\n1/4/2016,36°
4,Tue\n1/5/2016,29°


In [82]:
df_weather_2016['Created_Date'] = pd.to_datetime(df_weather_2016['Created_Date'])

In [179]:
#df_weather_2016 = df_weather_2016.set_index('Created_Date')

In [180]:
year_array = np.arange('2016-01', '2017-01', dtype='datetime64[D]')

In [87]:
df_weather_2016['Buiding_Conditions'] = "-"
df_weather_2016['General_Request'] = "-"
df_weather_2016['Living_Conditions'] = "-"
df_weather_2016['Signal_Sign_Repair'] = "-"
df_weather_2016['Street_Sidewalk_Repair'] = "-"
df_weather_2016['Max_TempF_'] = df_weather_2016['Max TempF'].astype
df_weather_2016['Weather'] = "-"

KeyError: 'Max TempF'

In [84]:
df_weather_2016.drop('Max TempF', axis=1, inplace=True)

In [85]:
df_2016 = df_weather_2016.rename(columns={'Max_TempF_': 'Max_TempF'})

In [88]:
df_2016.head()
#df_2015.drop('Holiday', axis=1, inplace=True)

Unnamed: 0,Created_Date,Buiding_Conditions,General_Request,Living_Conditions,Signal_Sign_Repair,Street_Sidewalk_Repair,Max_TempF,Weather
0,2016-01-01,-,-,-,-,-,42°,-
1,2016-01-02,-,-,-,-,-,40°,-
2,2016-01-03,-,-,-,-,-,45°,-
3,2016-01-04,-,-,-,-,-,36°,-
4,2016-01-05,-,-,-,-,-,29°,-


In [91]:
df_2016.to_csv('2016_df.csv', index = False)
df_2015.to_csv('2015_df.csv')
df_2014.to_csv('2014_df.csv')

In [90]:
df_2015.head()

Unnamed: 0_level_0,Building_Conditions,General_Request,Living_Conditions,Parking_Enforcement,Signal_Sign_Repair,Street_Sidewalk_Repair,Max_TempF,Weather
Created_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-01,1607,369,273,438,112,264,40,Sunny
2015-01-02,1935,604,581,588,282,499,45,Sunny
2015-01-03,1527,465,369,511,156,322,47,Rain
2015-01-04,1230,395,403,486,165,357,55,Fog-Rain
2015-01-05,2797,748,705,625,537,734,51,Sunny


In [60]:
df_2014 = df[:365].join(df_weather_2014)
df_2015 = df[365:].join(df_weather_2015)
#df_2016 = df_2016.join(df_weather_2016)

In [61]:
df_2014.columns = ['Building_Conditions', 'General_Request', 'Living_Conditions', 'Parking_Enforcement', 'Signal_Sign_Repair', 'Street_Sidewalk_Repair', 'Max_TempF', 'Weather']
df_2015.columns = ['Building_Conditions', 'General_Request', 'Living_Conditions', 'Parking_Enforcement', 'Signal_Sign_Repair', 'Street_Sidewalk_Repair', 'Max_TempF', 'Weather']
df_2016.columns = ['Building_Conditions', 'General_Request', 'Living_Conditions', 'Parking_Enforcement', 'Signal_Sign_Repair', 'Street_Sidewalk_Repair', 'Max_TempF', 'Weather']

In [62]:
df_2014.head()

Unnamed: 0_level_0,Building_Conditions,General_Request,Living_Conditions,Parking_Enforcement,Signal_Sign_Repair,Street_Sidewalk_Repair,Max_TempF,Weather
Created_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-01,1512,388,155,336,139,303,34,Sunny
2014-01-02,3236,921,263,360,315,320,33,Snow
2014-01-03,3712,624,235,306,240,516,19,Fog-Snow
2014-01-04,4347,489,154,375,130,1188,31,Sunny
2014-01-05,2529,500,169,361,206,1022,49,Fog-Rain


In [None]:
encode = preprocessing.LabelEncoder()

In [None]:
encode.fit(df_2014_final['Max TempF'])

In [None]:
df_2014_final['Max TempF'] = encode.transform(df_2014_final['Max TempF'])

In [None]:
encode.fit(df_2015_final['Max TempF'])

In [None]:
df_2015_final['Max TempF'] = encode.transform(df_2015_final['Max TempF'])

In [None]:
#df_2016['Max_TempF'] = df_2016['Max_TempF'].str.replace('°', '')
#df_2016.Max_TempF = pd.to_numeric(df_2016.Max_TempF)

In [372]:
X_2014 = df_2014_final.reset_index(drop=True)
y_2014 = df_2014_final['Weather']
X_2015 = df_2015_final.reset_index(drop=True)
y_2015 = df_2015_final['Weather']

In [373]:
X_train = X_2014
X_test = X_2015

In [374]:
y_train = y_2014.values
y_test = y_2015.values

In [375]:
lr = LinearRegression()

In [376]:
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)