# Capstone - Toronto Shelter Occupancy Prediction

## Import Modules

In [170]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Ignore warnings
warnings.filterwarnings("ignore")

In [154]:
# Loading data csv
df = pd.read_csv('data/shelter_occupancy_cleaned.csv').drop(['Unnamed: 0'], axis=1)
coor_df = pd.read_csv('data/shelter_coordinates.csv').drop(['Unnamed: 0'], axis=1)
weather_df = pd.read_csv('data/toronto_weather.csv').drop(['Unnamed: 0'], axis=1)

---

In [155]:
# Merging data frames
df = pd.merge(df, coor_df, how='left', on='SHELTER_POSTAL_CODE')
df = pd.merge(df, weather_df, how='left', on='OCCUPANCY_DATE')

In [156]:
# Converting categorical data to dummy variables
df = pd.get_dummies(df, columns=['SECTOR', 'sublocality'])

In [157]:
# Dropping columns before fitting
df = df.drop([
    'FACILITY_NAME', 'OCCUPANCY_DATE', 'ORGANIZATION_NAME', 'PROGRAM_NAME', 'SHELTER_ADDRESS', 'SHELTER_CITY',
    'SHELTER_NAME', 'SHELTER_POSTAL_CODE', 'SHELTER_PROVINCE','lat', 'lng', 'OCCUPANCY',
    'Week_Day'
], axis=1)

In [158]:
# Defining occupancy level
# 1 if occupancy rate is higher than 100%
df['OCCUPANCY_LEVEL'] = df.OCCUPANCY_RATE.apply(lambda x : 1 if x >=1 else 0)
df = df.drop(['OCCUPANCY_RATE'], axis=1)

In [159]:
# It seems that the historical weather data contains null values
df.columns[df.isna().any()]

Index(['Mean Temp (°C)', 'Total Precip (mm)'], dtype='object')

In [160]:
# Dropping rows with null weather data
df.dropna(inplace=True)

In [162]:
# Preparing data for model fitting
X = df.loc[:, 'CAPACITY':'sublocality_York']
y = df.OCCUPANCY_LEVEL

scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7)

In [171]:
# Trying to fit a Logistic Regression model
logit = LogisticRegression().fit(X_train, y_train)