In [21]:
import mysql.connector
import pandas as pd
import getpass
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

password = getpass.getpass()

In [22]:
cnx = mysql.connector.connect(user = 'root', password = password,
                              host = '127.0.0.1', database = 'sakila')

cnx.is_connected()
cursor = cnx.cursor()

1

In [23]:
query = '''
SELECT 
  f.film_id, 
  f.title, 
  f.description, 
  f.release_year, 
  f.length, 
  f.rating, 
  f.rental_rate, 
  f.replacement_cost, 
  COUNT(r.rental_id) AS rental_count
FROM 
  film f 
  JOIN inventory i ON f.film_id = i.film_id 
  JOIN rental r ON i.inventory_id = r.inventory_id 
GROUP BY 
  f.film_id 
ORDER BY 
  f.film_id;

'''


In [24]:
cursor.execute(query)

2

In [25]:
data = pd.DataFrame(cursor.fetchall())
data.columns = [head[0] for head in cursor.description]

In [26]:
data.head()

Unnamed: 0,film_id,title,description,release_year,length,rating,rental_rate,replacement_cost,rental_count
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,0.99,20.99,23
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,48,G,4.99,12.99,7
2,3,ADAPTATION HOLES,A Astounding Reflection of a Lumberjack And a ...,2006,50,NC-17,2.99,18.99,12
3,4,AFFAIR PREJUDICE,A Fanciful Documentary of a Frisbee And a Lumb...,2006,117,G,2.99,26.99,23
4,5,AFRICAN EGG,A Fast-Paced Documentary of a Pastry Chef And ...,2006,130,G,2.99,22.99,12


3

In [27]:
data.dtypes

film_id              int64
title               object
description         object
release_year         int64
length               int64
rating              object
rental_rate         object
replacement_cost    object
rental_count         int64
dtype: object

In [28]:
data.isna().sum()

film_id             0
title               0
description         0
release_year        0
length              0
rating              0
rental_rate         0
replacement_cost    0
rental_count        0
dtype: int64

In [29]:
cat_to_num=["rental_rate", "replacement_cost"]
for cat in cat_to_num:
    data[cat]=data[cat].astype("int64")
data.dtypes

film_id              int64
title               object
description         object
release_year         int64
length               int64
rating              object
rental_rate          int64
replacement_cost     int64
rental_count         int64
dtype: object

In [30]:
num_to_cat=["film_id","release_year"]
for num in num_to_cat:
    data[num]=data[num].astype("object")
data.dtypes

film_id             object
title               object
description         object
release_year        object
length               int64
rating              object
rental_rate          int64
replacement_cost     int64
rental_count         int64
dtype: object

4

In [31]:
query = '''
SELECT f.film_id, IF(MAX(r.rental_date) >= '2006-02-01', TRUE, FALSE) AS rented_last_month
FROM film f
JOIN inventory i ON f.film_id = i.film_id
JOIN rental r ON i.inventory_id = r.inventory_id
GROUP BY f.film_id
ORDER BY f.film_id;
'''

In [32]:
cursor.execute(query)

In [33]:
data1 = pd.DataFrame(cursor.fetchall())
data1.columns = [head[0] for head in cursor.description]

In [34]:
data1['rented_last_month'].value_counts()

0    790
1    168
Name: rented_last_month, dtype: int64

In [35]:
data['rented_last_month'] = data1['rented_last_month']

In [36]:
data.head()

Unnamed: 0,film_id,title,description,release_year,length,rating,rental_rate,replacement_cost,rental_count,rented_last_month
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,0,20,23,0
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,48,G,4,12,7,1
2,3,ADAPTATION HOLES,A Astounding Reflection of a Lumberjack And a ...,2006,50,NC-17,2,18,12,0
3,4,AFFAIR PREJUDICE,A Fanciful Documentary of a Frisbee And a Lumb...,2006,117,G,2,26,23,1
4,5,AFRICAN EGG,A Fast-Paced Documentary of a Pastry Chef And ...,2006,130,G,2,22,12,1


In [37]:
data["rented_last_month"] = data["rented_last_month"].replace({"True": True, "False": False}).astype(bool)

5

In [38]:
Y = data["rented_last_month"]
data_x = data.drop(["rented_last_month"], axis=1)

numeric = data_x.select_dtypes(include=[np.number])
categoric = data_x.select_dtypes(include=['object'])

In [39]:
transformer = Normalizer().fit(numeric)
numeric_normalized=transformer.transform(numeric)
numeric_x=pd.DataFrame(numeric_normalized)
numeric_x.columns=numeric.columns
numeric_x.head()

Unnamed: 0,length,rental_rate,replacement_cost,rental_count
0,0.942554,0.0,0.219199,0.252078
1,0.957514,0.079793,0.239378,0.139637
2,0.917161,0.036686,0.330178,0.220119
3,0.958566,0.016386,0.213015,0.188436
4,0.98181,0.015105,0.166153,0.090629


In [40]:
categoric = categoric.drop('film_id', axis = 1)
categoric_x=pd.get_dummies(categoric,columns=categoric.columns)
categoric_x

  categoric_x=pd.get_dummies(categoric,columns=categoric.columns)


Unnamed: 0,title_ACADEMY DINOSAUR,title_ACE GOLDFINGER,title_ADAPTATION HOLES,title_AFFAIR PREJUDICE,title_AFRICAN EGG,title_AGENT TRUMAN,title_AIRPLANE SIERRA,title_AIRPORT POLLOCK,title_ALABAMA DEVIL,title_ALADDIN CALENDAR,...,description_A Unbelieveable Yarn of a Boat And a Database Administrator who must Meet a Boy in The First Manned Space Station,description_A Unbelieveable Yarn of a Database Administrator And a Woman who must Succumb a A Shark in A U-Boat,description_A Unbelieveable Yarn of a Mad Scientist And a Cat who must Chase a Lumberjack in Australia,description_A Unbelieveable Yarn of a Student And a Database Administrator who must Outgun a Husband in An Abandoned Mine Shaft,release_year_2006,rating_G,rating_NC-17,rating_PG,rating_PG-13,rating_R
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,1,0,0,0,0
954,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
955,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
956,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1


In [41]:
X = np.concatenate((numeric_x,categoric_x),axis=1)

In [42]:
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=100)

classification=LogisticRegression().fit(X_train,y_train)

predictions=classification.predict(X_test)
predictions

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [43]:
classification.score(X_test,y_test)

0.8229166666666666

In [44]:
y_test.value_counts()

False    237
True      51
Name: rented_last_month, dtype: int64

In [45]:
pd.Series(predictions).value_counts()

False    288
dtype: int64

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=100)

model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)

predictions = model.predict(X_test)

In [47]:
y_test.value_counts()

False    316
True      68
Name: rented_last_month, dtype: int64

In [48]:
pd.Series(predictions).value_counts()

False    384
dtype: int64

In [49]:
model.score(X_test,y_test)

0.8229166666666666

6

In [50]:
data["rented_next_month"]=model.predict(X)
data

Unnamed: 0,film_id,title,description,release_year,length,rating,rental_rate,replacement_cost,rental_count,rented_last_month,rented_next_month
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,86,PG,0,20,23,False,False
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,48,G,4,12,7,True,True
2,3,ADAPTATION HOLES,A Astounding Reflection of a Lumberjack And a ...,2006,50,NC-17,2,18,12,False,False
3,4,AFFAIR PREJUDICE,A Fanciful Documentary of a Frisbee And a Lumb...,2006,117,G,2,26,23,True,False
4,5,AFRICAN EGG,A Fast-Paced Documentary of a Pastry Chef And ...,2006,130,G,2,22,12,True,True
...,...,...,...,...,...,...,...,...,...,...,...
953,996,YOUNG LANGUAGE,A Unbelieveable Yarn of a Boat And a Database ...,2006,183,G,0,9,7,False,False
954,997,YOUTH KICK,A Touching Drama of a Teacher And a Cat who mu...,2006,179,NC-17,0,14,6,False,False
955,998,ZHIVAGO CORE,A Fateful Yarn of a Composer And a Man who mus...,2006,105,NC-17,0,10,9,True,True
956,999,ZOOLANDER FICTION,A Fateful Reflection of a Waitress And a Boat ...,2006,101,R,2,28,17,False,False


In [51]:
data_rented_next_month = data[data['rented_next_month'] == True]
data_rented_next_month = data_rented_next_month[["title", "rented_last_month", "rented_next_month"]]
data_rented_next_month

Unnamed: 0,title,rented_last_month,rented_next_month
1,ACE GOLDFINGER,True,True
4,AFRICAN EGG,True,True
15,ALONE TRIP,True,True
17,AMADEUS HOLY,True,True
20,AMISTAD MIDSUMMER,True,True
...,...,...,...
923,WEDDING APOLLO,True,True
931,WILD APOLLO,True,True
939,WOMEN DORADO,True,True
947,WORLD LEATHERNECKS,True,True
