In [189]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
import seaborn as sns

#LOAD THE DATABASE

In [None]:
data = pd.read_csv('Netflix_Userbase.csv', encoding = 'latin1')

In [213]:
label_encoder = LabelEncoder()
data['pay'] = label_encoder.fit_transform(data['Subscription Type'])
data_dummies = pd.get_dummies(data, drop_first = True)
data_dummies['out'] = (data['pay']>1).astype(int)

In [233]:
label_encoder = LabelEncoder()
data['date'] = label_encoder.fit_transform(data['Last Payment Date'])
data_dummies1 = pd.get_dummies(data, drop_first = True)
data_dummies1['day'] = (data['date']>1).astype(int)

In [None]:
statistics = data.describe()
print(data.isnull().sum())

In [217]:
x = data_dummies.drop('Monthly Revenue', axis=1)
y = data_dummies['out']

In [234]:
x1 = data_dummies1.drop(['Age','day'], axis=1)
y1 = data_dummies1['Age']

I selected the date and age variables because it is well-known that movie and TV show recommendations are often tailored to viewers' ages and interests. If viewers are provided with content suitable for their age group and preferences, it can lead to positive feedback. Conversely, if viewers are not presented with suitable content based on their age, it may result in negative feedback and potentially cause them to disengage from Netflix.

#Exploratory Data Analysis (EDA)

In [None]:
numeric_summary = data.describe()
print(numeric_summary)

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='Subscription Type', hue='Monthly Revenue', multiple='stack', bins=20)
plt.title('Distribution of Subscription Type and its Impact on Monthly Revenue')
plt.xlabel('Subscription Type')
plt.ylabel('Count')
plt.title('Feedback')
plt.show()

#Linear Regression Model (Predicting Monthly Revenue)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2,random_state=10)
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
new_data = np.array([[10,15,12]* 115])
prediction = model.predict(new_data)
print(f"Predicted Revenue: {prediction[0]}")

In [220]:
y_pred = model.predict(x_test)
r_squared = r2_score(y_test, y_pred)
print("R-squared:", r_squared)

R-squared: 1.0


#Logistic Regression Model (Predicting Customer Feedback)

In [235]:
x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y, test_size=0.2, random_state=10)
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train1)
x_test_scaled = scaler.fit_transform(x_test1)

In [None]:
model = LogisticRegression()
model.fit(x_train_scaled,y_train1)

In [237]:
y_pred = model.predict(x_test_scaled)

In [None]:
accuracy = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
class_report = classification_report(y_test1, y_pred)

print(f"Accuracy: " ,{accuracy})
print("Confusion Matrix: ")
print(conf_matrix)
print(f"Classification Report: ")
print(class_report)

#Comparative Analysis and Visualization`

Linear regression models, with their ability to predict continuous outcomes, could be applied to analyze patterns within Netflix userbase data such as subscription growth over time or the relationship between user demographics (like age and location) and viewing habits. On the other hand, logistic regression models, being well-suited for binary classification tasks, could be utilized to understand factors influencing user retention or to predict whether a user is likely to churn based on their historical interaction data with the platform. By leveraging these regression techniques alongside Netflix userbase data, one could gain insights into the dynamics of user behavior and preferences, ultimately aiding in strategic decision-making for content recommendation, marketing strategies, and platform optimization efforts.

**Linear regression equation (based on monthly income):**

A higher coefficient in a linear regression indicates a stronger impact of the model on predicted revenue.
A positive coefficient indicates that increasing functionality increases revenue.
Logistic regression insights:

In statistical analysis, the coefficient indicates how a characteristic influences the likelihood of positive attitudes.
Features with high coefficients have a strong influence on predicting positive emotions.

**Logistic Regression Insights (Feedback Prediction):**

In logistic regression, coefficients show how characteristics affect the probability of a positive response. A positive coefficient indicates that a characteristic increases the probability of a positive response."
Attributes with high coefficients have a strong impact on predicting positive feedback. For example, a high coefficient for customer satisfaction suggests that it strongly influences positive feedback."