# Importing all Libraries

In [None]:
#pip install pymysql

In [2]:
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import getpass  # To get the password without showing the input
password = getpass.getpass()

········


# SQL queries for data

In [3]:
connection_string = 'mysql+pymysql://root:'+password+'@localhost/sakila'
engine = create_engine(connection_string)
query = '''SELECT f.title AS 'film_title', COUNT(r.rental_date)
FROM sakila.film f
JOIN sakila.inventory i USING(film_id)
JOIN sakila.rental r USING(inventory_id)
JOIN sakila.film_category fc USING(film_id)
JOIN sakila.category c USING(category_id)
WHERE r.rental_date LIKE '%%2005-08%%'
GROUP BY f.title
ORDER BY f.title;'''

target = pd.read_sql_query(query, engine)
target.head()

Unnamed: 0,film_title,COUNT(r.rental_date)
0,ACADEMY DINOSAUR,9
1,ACE GOLDFINGER,4
2,ADAPTATION HOLES,6
3,AFFAIR PREJUDICE,6
4,AFRICAN EGG,3


# Adding column 'rented_aug'

In [4]:
target['rented_aug'] = 1


In [5]:
target.head()

Unnamed: 0,film_title,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,9,1
1,ACE GOLDFINGER,4,1
2,ADAPTATION HOLES,6,1
3,AFFAIR PREJUDICE,6,1
4,AFRICAN EGG,3,1


In [6]:
connection_string = 'mysql+pymysql://root:'+password+'@localhost/sakila'
engine = create_engine(connection_string)
query ='''SELECT f.title AS 'film_title', f.rental_rate, f.rental_duration, f.length, f.rating, f.special_features, c.name AS 'category'
FROM sakila.film f
JOIN sakila.film_category fc USING(film_id)
JOIN sakila.category c USING(category_id)
ORDER BY title;'''

features = pd.read_sql_query(query, engine)
features

Unnamed: 0,film_title,rental_rate,rental_duration,length,rating,special_features,category
0,ACADEMY DINOSAUR,0.99,6,86,PG,"Deleted Scenes,Behind the Scenes",Documentary
1,ACE GOLDFINGER,4.99,3,48,G,"Trailers,Deleted Scenes",Horror
2,ADAPTATION HOLES,2.99,7,50,NC-17,"Trailers,Deleted Scenes",Documentary
3,AFFAIR PREJUDICE,2.99,5,117,G,"Commentaries,Behind the Scenes",Horror
4,AFRICAN EGG,2.99,6,130,G,Deleted Scenes,Family
...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,0.99,6,183,G,"Trailers,Behind the Scenes",Documentary
996,YOUTH KICK,0.99,4,179,NC-17,"Trailers,Behind the Scenes",Music
997,ZHIVAGO CORE,0.99,6,105,NC-17,Deleted Scenes,Horror
998,ZOOLANDER FICTION,2.99,5,101,R,"Trailers,Deleted Scenes",Children


# Scaling numerical columns

In [7]:
from sklearn.preprocessing import MinMaxScaler

In [8]:
scaler = MinMaxScaler()
#.fit(data= features, columns=['rental_rate','rental_duration','length'])

In [9]:
scaler.fit(features[['rental_rate','rental_duration','length']])

MinMaxScaler()

In [10]:
features_trans = scaler.transform(features[['rental_rate','rental_duration','length']])

In [11]:
features_trans
trans_feature = pd.DataFrame(features_trans)

In [12]:
trans_feature

Unnamed: 0,0,1,2
0,0.0,0.75,0.287770
1,1.0,0.00,0.014388
2,0.5,1.00,0.028777
3,0.5,0.50,0.510791
4,0.5,0.75,0.604317
...,...,...,...
995,0.0,0.75,0.985612
996,0.0,0.25,0.956835
997,0.0,0.75,0.424460
998,0.5,0.50,0.395683


In [13]:
scaled_features = pd.concat([features, trans_feature], axis = 1)
scaled_features

Unnamed: 0,film_title,rental_rate,rental_duration,length,rating,special_features,category,0,1,2
0,ACADEMY DINOSAUR,0.99,6,86,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770
1,ACE GOLDFINGER,4.99,3,48,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388
2,ADAPTATION HOLES,2.99,7,50,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777
3,AFFAIR PREJUDICE,2.99,5,117,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791
4,AFRICAN EGG,2.99,6,130,G,Deleted Scenes,Family,0.5,0.75,0.604317
...,...,...,...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,0.99,6,183,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612
996,YOUTH KICK,0.99,4,179,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835
997,ZHIVAGO CORE,0.99,6,105,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460
998,ZOOLANDER FICTION,2.99,5,101,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683


In [14]:
# Dropping 'rental_rate', 'rental_duration', and 'length' now that they are scaled
scale_features = scaled_features.drop(['rental_rate', 'rental_duration','length'], axis = 1)
scale_features

Unnamed: 0,film_title,rating,special_features,category,0,1,2
0,ACADEMY DINOSAUR,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770
1,ACE GOLDFINGER,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388
2,ADAPTATION HOLES,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777
3,AFFAIR PREJUDICE,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791
4,AFRICAN EGG,G,Deleted Scenes,Family,0.5,0.75,0.604317
...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612
996,YOUTH KICK,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835
997,ZHIVAGO CORE,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460
998,ZOOLANDER FICTION,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683


In [17]:
scale_features = scale_features.rename({0:'rental_rate',1:'rental_duration',2:'length'}, axis = 1)

In [18]:
scale_features

Unnamed: 0,film_title,rating,special_features,category,rental_rate,rental_duration,length
0,ACADEMY DINOSAUR,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770
1,ACE GOLDFINGER,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388
2,ADAPTATION HOLES,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777
3,AFFAIR PREJUDICE,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791
4,AFRICAN EGG,G,Deleted Scenes,Family,0.5,0.75,0.604317
...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612
996,YOUTH KICK,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835
997,ZHIVAGO CORE,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460
998,ZOOLANDER FICTION,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683


In [19]:
target.shape

(958, 3)

In [20]:
scale_features.shape

(1000, 7)

# Finding titles NOT rented in August 2005

In [21]:
all_titles = scale_features['film_title']
all_titles
aug_titles = target['film_title']
aug_titles

0       ACADEMY DINOSAUR
1         ACE GOLDFINGER
2       ADAPTATION HOLES
3       AFFAIR PREJUDICE
4            AFRICAN EGG
             ...        
953       YOUNG LANGUAGE
954           YOUTH KICK
955         ZHIVAGO CORE
956    ZOOLANDER FICTION
957            ZORRO ARK
Name: film_title, Length: 958, dtype: object

In [22]:
lst_all_titles = []
for title in all_titles:
    lst_all_titles.append(title)
len(lst_all_titles)

1000

In [23]:
lst_aug_titles= []
for title in aug_titles:
    lst_aug_titles.append(title)
len(lst_aug_titles)

958

In [24]:
lst_0_aug = [item for item in lst_all_titles if item not in lst_aug_titles]

len(lst_0_aug)


42

# Adding the NOT rented in August to the target dataframe

In [25]:
df_0_aug = pd.DataFrame(lst_0_aug)

df_0_aug['COUNT(r.rental_date)'] = 0
df_0_aug['rented_aug'] = 0

df_0_aug = df_0_aug.rename(columns = {0:'Title'})
#df_new = df.rename(columns={'A': 'Col_1'}
df_0_aug.head()

Unnamed: 0,Title,COUNT(r.rental_date),rented_aug
0,ALICE FANTASIA,0,0
1,APOLLO TEEN,0,0
2,ARGONAUTS TOWN,0,0
3,ARK RIDGEMONT,0,0
4,ARSENIC INDEPENDENCE,0,0


In [26]:
target = target.rename(columns = {'film_title': 'Title'})
target.head()

Unnamed: 0,Title,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,9,1
1,ACE GOLDFINGER,4,1
2,ADAPTATION HOLES,6,1
3,AFFAIR PREJUDICE,6,1
4,AFRICAN EGG,3,1


In [27]:
target_all = pd.concat([target, df_0_aug], ignore_index=True)
target_all

Unnamed: 0,Title,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,9,1
1,ACE GOLDFINGER,4,1
2,ADAPTATION HOLES,6,1
3,AFFAIR PREJUDICE,6,1
4,AFRICAN EGG,3,1
...,...,...,...
995,TREASURE COMMAND,0,0
996,VILLAIN DESPERATE,0,0
997,VOLUME HOUSE,0,0
998,WAKE JAWS,0,0


# Sorting alphabetically

In [28]:
target_all = target_all.sort_values('Title').reset_index()
target_all

Unnamed: 0,index,Title,COUNT(r.rental_date),rented_aug
0,0,ACADEMY DINOSAUR,9,1
1,1,ACE GOLDFINGER,4,1
2,2,ADAPTATION HOLES,6,1
3,3,AFFAIR PREJUDICE,6,1
4,4,AFRICAN EGG,3,1
...,...,...,...,...
995,953,YOUNG LANGUAGE,3,1
996,954,YOUTH KICK,3,1
997,955,ZHIVAGO CORE,2,1
998,956,ZOOLANDER FICTION,7,1


In [29]:
target_all['rented_aug'].value_counts()

1    958
0     42
Name: rented_aug, dtype: int64

In [30]:
target_all = target_all.drop('index', axis = 1)
target_all

Unnamed: 0,Title,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,9,1
1,ACE GOLDFINGER,4,1
2,ADAPTATION HOLES,6,1
3,AFFAIR PREJUDICE,6,1
4,AFRICAN EGG,3,1
...,...,...,...
995,YOUNG LANGUAGE,3,1
996,YOUTH KICK,3,1
997,ZHIVAGO CORE,2,1
998,ZOOLANDER FICTION,7,1


# Combining both dataframes

In [31]:
full_data = pd.concat([scale_features,target_all],axis=1)
full_data

Unnamed: 0,film_title,rating,special_features,category,rental_rate,rental_duration,length,Title,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770,ACADEMY DINOSAUR,9,1
1,ACE GOLDFINGER,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388,ACE GOLDFINGER,4,1
2,ADAPTATION HOLES,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777,ADAPTATION HOLES,6,1
3,AFFAIR PREJUDICE,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791,AFFAIR PREJUDICE,6,1
4,AFRICAN EGG,G,Deleted Scenes,Family,0.5,0.75,0.604317,AFRICAN EGG,3,1
...,...,...,...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612,YOUNG LANGUAGE,3,1
996,YOUTH KICK,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835,YOUTH KICK,3,1
997,ZHIVAGO CORE,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460,ZHIVAGO CORE,2,1
998,ZOOLANDER FICTION,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683,ZOOLANDER FICTION,7,1


In [32]:
full_data['rented_aug'].value_counts()

1    958
0     42
Name: rented_aug, dtype: int64

In [33]:
full_data = full_data.drop(('Title'),axis=1)


In [34]:
full_data

Unnamed: 0,film_title,rating,special_features,category,rental_rate,rental_duration,length,COUNT(r.rental_date),rented_aug
0,ACADEMY DINOSAUR,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770,9,1
1,ACE GOLDFINGER,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388,4,1
2,ADAPTATION HOLES,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777,6,1
3,AFFAIR PREJUDICE,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791,6,1
4,AFRICAN EGG,G,Deleted Scenes,Family,0.5,0.75,0.604317,3,1
...,...,...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612,3,1
996,YOUTH KICK,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835,3,1
997,ZHIVAGO CORE,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460,2,1
998,ZOOLANDER FICTION,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683,7,1


# Encoding catagorical columns 'rating' and 'category'

In [35]:
r = pd.get_dummies(full_data['rating'], prefix='rating')
r

Unnamed: 0,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,0,0,1,0,0
1,1,0,0,0,0
2,0,1,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0
...,...,...,...,...,...
995,1,0,0,0,0
996,0,1,0,0,0
997,0,1,0,0,0
998,0,0,0,0,1


In [36]:
c = pd.get_dummies(full_data['category'], prefix='genre')
c

Unnamed: 0,genre_Action,genre_Animation,genre_Children,genre_Classics,genre_Comedy,genre_Documentary,genre_Drama,genre_Family,genre_Foreign,genre_Games,genre_Horror,genre_Music,genre_New,genre_Sci-Fi,genre_Sports,genre_Travel
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
996,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
997,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
998,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


# Deciding on Special Features

In [37]:
full_data['special_features'].isna().sum()

0

#### Will use Special Features if model neeeds improved

## Combining encoded columns to dataframe

In [38]:
encoded = pd.concat([c,r],axis=1)
encoded

Unnamed: 0,genre_Action,genre_Animation,genre_Children,genre_Classics,genre_Comedy,genre_Documentary,genre_Drama,genre_Family,genre_Foreign,genre_Games,...,genre_Music,genre_New,genre_Sci-Fi,genre_Sports,genre_Travel,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
996,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
998,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [39]:
full_data_encoded = pd.concat([full_data,encoded],axis=1)
full_data_encoded

Unnamed: 0,film_title,rating,special_features,category,rental_rate,rental_duration,length,COUNT(r.rental_date),rented_aug,genre_Action,...,genre_Music,genre_New,genre_Sci-Fi,genre_Sports,genre_Travel,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,ACADEMY DINOSAUR,PG,"Deleted Scenes,Behind the Scenes",Documentary,0.0,0.75,0.287770,9,1,0,...,0,0,0,0,0,0,0,1,0,0
1,ACE GOLDFINGER,G,"Trailers,Deleted Scenes",Horror,1.0,0.00,0.014388,4,1,0,...,0,0,0,0,0,1,0,0,0,0
2,ADAPTATION HOLES,NC-17,"Trailers,Deleted Scenes",Documentary,0.5,1.00,0.028777,6,1,0,...,0,0,0,0,0,0,1,0,0,0
3,AFFAIR PREJUDICE,G,"Commentaries,Behind the Scenes",Horror,0.5,0.50,0.510791,6,1,0,...,0,0,0,0,0,1,0,0,0,0
4,AFRICAN EGG,G,Deleted Scenes,Family,0.5,0.75,0.604317,3,1,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,G,"Trailers,Behind the Scenes",Documentary,0.0,0.75,0.985612,3,1,0,...,0,0,0,0,0,1,0,0,0,0
996,YOUTH KICK,NC-17,"Trailers,Behind the Scenes",Music,0.0,0.25,0.956835,3,1,0,...,1,0,0,0,0,0,1,0,0,0
997,ZHIVAGO CORE,NC-17,Deleted Scenes,Horror,0.0,0.75,0.424460,2,1,0,...,0,0,0,0,0,0,1,0,0,0
998,ZOOLANDER FICTION,R,"Trailers,Deleted Scenes",Children,0.5,0.50,0.395683,7,1,0,...,0,0,0,0,0,0,0,0,0,1


# Dropping 'rating' and 'category' columns

In [40]:
full_data_encoded = full_data_encoded.drop(['rating','category'],axis = 1)
full_data_encoded

Unnamed: 0,film_title,special_features,rental_rate,rental_duration,length,COUNT(r.rental_date),rented_aug,genre_Action,genre_Animation,genre_Children,...,genre_Music,genre_New,genre_Sci-Fi,genre_Sports,genre_Travel,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,ACADEMY DINOSAUR,"Deleted Scenes,Behind the Scenes",0.0,0.75,0.287770,9,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,ACE GOLDFINGER,"Trailers,Deleted Scenes",1.0,0.00,0.014388,4,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,ADAPTATION HOLES,"Trailers,Deleted Scenes",0.5,1.00,0.028777,6,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,AFFAIR PREJUDICE,"Commentaries,Behind the Scenes",0.5,0.50,0.510791,6,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,AFRICAN EGG,Deleted Scenes,0.5,0.75,0.604317,3,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,YOUNG LANGUAGE,"Trailers,Behind the Scenes",0.0,0.75,0.985612,3,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
996,YOUTH KICK,"Trailers,Behind the Scenes",0.0,0.25,0.956835,3,1,0,0,0,...,1,0,0,0,0,0,1,0,0,0
997,ZHIVAGO CORE,Deleted Scenes,0.0,0.75,0.424460,2,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
998,ZOOLANDER FICTION,"Trailers,Deleted Scenes",0.5,0.50,0.395683,7,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1


# Saving 'film_title' and 'special_features' for later use

In [41]:
title_sp_feat = full_data_encoded[['film_title','special_features']]
title_sp_feat

Unnamed: 0,film_title,special_features
0,ACADEMY DINOSAUR,"Deleted Scenes,Behind the Scenes"
1,ACE GOLDFINGER,"Trailers,Deleted Scenes"
2,ADAPTATION HOLES,"Trailers,Deleted Scenes"
3,AFFAIR PREJUDICE,"Commentaries,Behind the Scenes"
4,AFRICAN EGG,Deleted Scenes
...,...,...
995,YOUNG LANGUAGE,"Trailers,Behind the Scenes"
996,YOUTH KICK,"Trailers,Behind the Scenes"
997,ZHIVAGO CORE,Deleted Scenes
998,ZOOLANDER FICTION,"Trailers,Deleted Scenes"


In [42]:
model_data = full_data_encoded.drop(['film_title','special_features'],axis = 1)
model_data

Unnamed: 0,rental_rate,rental_duration,length,COUNT(r.rental_date),rented_aug,genre_Action,genre_Animation,genre_Children,genre_Classics,genre_Comedy,...,genre_Music,genre_New,genre_Sci-Fi,genre_Sports,genre_Travel,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,0.0,0.75,0.287770,9,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,1.0,0.00,0.014388,4,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0.5,1.00,0.028777,6,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0.5,0.50,0.510791,6,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0.5,0.75,0.604317,3,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.0,0.75,0.985612,3,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
996,0.0,0.25,0.956835,3,1,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
997,0.0,0.75,0.424460,2,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
998,0.5,0.50,0.395683,7,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1


# All columns processed for use in Model

# Test, train, split

In [43]:
y = model_data['rented_aug']
X = model_data.drop('rented_aug', axis = 1)

# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=75)


In [44]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression().fit(X_train, y_train)
print('training set score:{:3f}'.format(logreg.score(X_train,y_train)))
print('test set score:{:3f}'.format(logreg.score(X_test,y_test)))

training set score:1.000000
test set score:1.000000


In [45]:
prediction = logreg.predict(X_test)

In [46]:
prediction

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1])

# Looking at confusion matrix

In [47]:
from sklearn.metrics import confusion_matrix

In [48]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, prediction)

array([[ 13,   0],
       [  0, 237]])