In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import sklearn as sk
import statsmodels as sm
import sympy
from statsmodels.regression import linear_model as lm
from statsmodels.discrete import discrete_model as dm
from statsmodels.tools import add_constant
from statsmodels.datasets import get_rdataset

In [None]:
df = get_rdataset('Default', 'ISLR', cache=True).data

In [None]:
df.default = df.default.apply(lambda x: 0 if x=='No' else 1)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
plt.scatter(x = 'balance', y = 'income', data = df, c = 'default', marker = '+')

In [None]:
plt.subplot(121)
sns.boxplot(x = 'default', y = 'balance', data = df)
plt.subplot(122)
sns.boxplot(x='default', y = 'income', data = df)
plt.tight_layout()

In [None]:
X = add_constant(df.balance)
mdl = lm.OLS(df.default, X).fit()
print(mdl.summary())

In [None]:
plt.plot(df.balance, np.polyval(mdl.params.to_list()[::-1], df.balance), '-', df.balance, df.default, '.', markersize = 1)

In [None]:
X = add_constant(df.balance)
log_mdl = dm.Logit(df.default, X).fit()
print(log_mdl.summary())

In [None]:
plt.plot(df.balance, log_mdl.predict(X), '.', df.balance, df.default, '.', markersize = 1)

In [None]:
p_x

In [None]:
sympy.plot(p_x.subs({b0:-5, b1:0.0055}), (x, 0, 2500) )

In [None]:
sympy.simplify(p_x/(1 - p_x))

In [None]:
sympy.log(sympy.simplify(p_x/(1 - p_x)))

In [None]:
df.head()

In [None]:
df.student = df.student.apply(lambda x: 1 if x=='Yes' else 0)

In [None]:
df.groupby('student').mean().default

In [None]:
X = add_constant(df[['student', 'balance', 'income']])
std_model = dm.Logit(df.default, X).fit()
print(std_model.summary())

In [None]:
plt.scatter(df.balance, std_model.predict(X), c=df.student, s = 1)
zeros = df.groupby('student').mean().default[0]
ones = df.groupby('student').mean().default[1]
plt.plot(df.balance, np.full(df.balance.shape, zeros), '--', label = 'non-students')
plt.plot(df.balance, np.full(df.balance.shape, ones), '--', label = 'students')
plt.legend()

In [None]:
std,u,x = sympy.symbols('std,u,x')
normal = 1/(sympy.sqrt(sympy.pi*2)*std) * sympy.exp( (-1/(2*std**2))*(x - u)**2 )

In [None]:
fkx = normal.subs({std: 1, u: 0})

In [None]:
sympy.plot(fkx, (x, -5, 5))

##  -----> Labs <------

#### --------- Logit Regression ---------

In [None]:
df = get_rdataset('Smarket', 'ISLR', cache=True).data

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.corr()

In [None]:
df.Direction = df.Direction.apply(lambda x: 1 if x=='Up' else 0)

In [None]:
df.head()

In [None]:
df.plot(y='Volume', marker='.')

In [None]:
X = add_constant(df[['Lag1','Lag2','Lag3','Lag4','Lag5', 'Volume']])
model = dm.Logit(df.Direction, X).fit()

In [None]:
print(model.summary())

In [None]:
pred = model.predict(X)

In [None]:
bool_pred = pred.map(lambda x: round(x))

In [None]:
compare = bool_pred == df.Direction

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
print(confusion_matrix(df.Direction, bool_pred))

In [None]:
print('TN | FP\n--------\nFN | TP')

In [None]:
compare.mean()

In [None]:
train = df[df['Year'] < 2005]
test = df[df['Year'] == 2005]

In [None]:
X = add_constant(train[['Lag1', 'Lag2']])
model = dm.Logit(train.Direction, X).fit()

In [None]:
pred = model.predict(add_constant(test[['Lag1', 'Lag2']]))

In [None]:
results = pred.map(lambda x: round(x))

In [None]:
print(confusion_matrix(test.Direction, results))

In [None]:
compare = results == test.Direction
compare.mean()

In [None]:
model.predict([1, 1.2, 1.1])

In [None]:
model.predict([1, 1.5, -0.8])

#### ---------- LDA Regression --------------

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
model = LinearDiscriminantAnalysis().fit(train[['Lag1','Lag2']], train.Direction)

In [None]:
model.scalings_[1,0]

In [None]:
model

In [None]:
reg_values = model.scalings_[0,0] * train.Lag1 + model.scalings_[1,0] * train.Lag2
plt.plot(reg_values, '*')

In [None]:
pred = model.predict(test[['Lag1','Lag2']])

In [None]:
confusion_matrix(test.Direction, pred)

In [None]:
compares = test.Direction == pred
compares.mean()

#### ----------- QDA Regression -------------

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
model = QuadraticDiscriminantAnalysis().fit(train[['Lag1','Lag2']], train.Direction)

In [None]:
model.priors_

In [None]:
pred = model.predict(test[['Lag1','Lag2']])

In [None]:
print(confusion_matrix(pred, test.Direction))

In [None]:
compares = pred == test.Direction
compares.mean()

#### --------- KNN Prediction -----------

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
df = get_rdataset('Caravan', 'ISLR', cache = True).data

In [None]:
df.head()

In [None]:
df.Purchase.value_counts()

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scld_df = scaler.fit_transform()

scld_df = pd.DataFrame(data = scld_df, columns=df.drop(columns=['Purchase']).columns)

scld_df['Purchase'] = df.Purchase

In [None]:
df.MAANTHUI.std()

In [None]:
scld_df.MAANTHUI.std()

In [None]:
train = scld_df.iloc[1000:]
test = scld_df.iloc[:1000]

In [None]:
X = train.drop('Purchase', axis=1)
y = train.Purchase
model = KNeighborsClassifier(n_neighbors=5).fit(X,y)

In [None]:
pred = model.predict(test.drop('Purchase', axis = 1))

In [None]:
compares = pred == test.Purchase

In [None]:
compares.mean()

In [None]:
print(confusion_matrix(test.Purchase, pred))

In [None]:
print('TN | FP\n--------\nFN | TP')

#### ----> Applied <----

In [None]:
df = get_rdataset('Weekly', 'ISLR', cache=True).data

In [None]:
df.head()

In [None]:
sns.pairplot(df, hue = 'Direction')

In [None]:
df.corr()

In [None]:
df.plot(y='Volume')

In [None]:
df.Direction = df.Direction.apply(lambda x: 1 if x == 'Up' else 0)

In [None]:
X = add_constant(df[['Lag1','Lag2','Lag3','Lag4','Lag5', 'Volume']])
model = dm.Logit(df['Direction'], X).fit()

In [None]:
print( model.summary() )

In [None]:
pred = model.predict(X)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(df.Direction, np.round(pred)))

In [None]:
mat = confusion_matrix(df.Direction, np.round(pred))
print('Positive Correct Predictions:', 557/(557+48), '\nNegative Correct Predictions:', 54/(430 + 54))

In [None]:
train = df[df.Year < 2009]
test = df[df.Year >= 2009]

In [None]:
X = add_constant(train['Lag2'])
model = dm.Logit(train['Direction'], X).fit()

print( model.summary() )
pred = model.predict(add_constant(test['Lag2']))

In [None]:
print(confusion_matrix(test.Direction, np.round(pred)))

In [None]:
mat = confusion_matrix(test.Direction, np.round(pred))
print('Positive Correct Predictions:', mat[1,1]/mat[:,1].sum() ,
      '\nNegative Correct Predictions:', mat[0,0]/mat[:,0].sum())

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda, QuadraticDiscriminantAnalysis as qda

In [None]:
model = lda().fit( np.array(train.Lag2).reshape(len(train),1) , train.Direction)
pred = model.predict( np.array(test.Lag2).reshape(len(test), 1) )

print(confusion_matrix(test.Direction, np.round(pred)))

mat = confusion_matrix(test.Direction, np.round(pred))
print('\nPositive Correct Predictions:', mat[1,1]/mat[:,1].sum() ,
      '\nNegative Correct Predictions:', mat[0,0]/mat[:,0].sum())

In [None]:
model = qda().fit( np.array(train.Lag2).reshape(len(train),1) , train.Direction)
pred = model.predict( np.array(test.Lag2).reshape(len(test), 1) )

print(confusion_matrix(test.Direction, np.round(pred)))

mat = confusion_matrix(test.Direction, np.round(pred))
print('\nPositive Correct Predictions:', mat[1,1]/mat[:,1].sum() ,
      '\nNegative Correct Predictions:', mat[0,0]/mat[:,0].sum())

In [None]:
from sklearn.neighbors import KNeighborsClassifier as knn

In [None]:
model = knn(n_neighbors=1).fit(np.array(train.Lag2).reshape(len(train),1) , train.Direction)
pred = model.predict( np.array(test.Lag2).reshape(len(test), 1) )

print(confusion_matrix(test.Direction, np.round(pred)))

mat = confusion_matrix(test.Direction, np.round(pred))
print('\nPositive Correct Predictions:', mat[1,1]/mat[:,1].sum() ,
      '\nNegative Correct Predictions:', mat[0,0]/mat[:,0].sum())

In [None]:
df = get_rdataset('Auto', 'ISLR', cache=True).data

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
df['mpg01'] = df.mpg
df.mpg01 = df.mpg.apply(lambda x: 1 if x > df.mpg.median() else 0)

In [None]:
df.head()

In [None]:
df.mpg.median()

In [None]:
df.drop('mpg', axis = 1, inplace = True)

In [None]:
sns.pairplot(df, hue = 'mpg01')

In [None]:
df.corr()

In [None]:
sns.boxplot(x = 'mpg01', y = 'weight', data = df)

In [None]:
sns.boxplot(x = 'mpg01', y = 'acceleration', data = df)

In [None]:
sns.boxplot(x = 'mpg01', y = 'cylinders', data = df)

In [None]:
sns.boxplot(x = 'mpg01', y = 'horsepower', data = df)

In [None]:
sns.boxplot(x = 'mpg01', y = 'displacement', data = df)

In [None]:
test = df.sample(frac = 0.33)

In [None]:
train = df.drop(test.index)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

In [None]:
train.head(1)

In [None]:
model = lda().fit(train[['cylinders', 'displacement', 'horsepower', 'weight']], train.mpg01)

pred = model.predict(test[['cylinders', 'displacement', 'horsepower', 'weight']])

In [None]:
from sklearn.metrics import confusion_matrix

mat = confusion_matrix(test.mpg01, pred)

In [None]:
print(mat)

(mat[0,1] + mat[1,0])/(mat[:,0].sum()+mat[:,1].sum())

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as qda

In [None]:
model = qda().fit(train[['cylinders', 'displacement', 'horsepower', 'weight']], train.mpg01)

pred = model.predict(test[['cylinders', 'displacement', 'horsepower', 'weight']])

mat = confusion_matrix(test.mpg01, pred)

In [None]:
print(mat)

(mat[0,1] + mat[1,0])/(mat[:,0].sum()+mat[:,1].sum())

In [None]:
X = add_constant( train[['cylinders', 'displacement', 'horsepower', 'weight']] )
model = dm.Logit(train.mpg01, X).fit()

pred = model.predict(add_constant(test[['cylinders', 'displacement', 'horsepower', 'weight']]))

In [None]:
mat = confusion_matrix(test.mpg01, pred.round())
print(mat)

(mat[0,1] + mat[1,0])/(mat[:,0].sum()+mat[:,1].sum())

In [None]:
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scld_df = scaler.fit_transform( train[['cylinders', 'displacement', 'horsepower', 'weight']] )

In [None]:
scld_df = pd.DataFrame(data = scld_df, columns = ['cylinders', 'displacement', 'horsepower', 'weight'])
scld_df.index = train.index

In [None]:
scld_df['mpg01'] = train.mpg01
model = knn(n_neighbors=3).fit(scld_df[['cylinders', 'displacement', 'horsepower', 'weight']], scld_df.mpg01)

In [None]:
scaled_test = scaler.fit_transform( test[['cylinders', 'displacement', 'horsepower', 'weight']] )
scaled_test = pd.DataFrame(data = scaled_test, columns = ['cylinders', 'displacement', 'horsepower', 'weight'], index = test.index)
scaled_test['mpg01'] = test.mpg01

pred = model.predict(scaled_test[['cylinders', 'displacement', 'horsepower', 'weight']])

In [None]:
mat = confusion_matrix(test.mpg01, pred.round())
print(mat)

(mat[0,1] + mat[1,0])/(mat[:,0].sum()+mat[:,1].sum())