In [21]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier,VotingRegressor
from sklearn.metrics import accuracy_score,r2_score

# Classifications 

In [112]:
data = pd.read_csv('datasets/Credit Default.csv')
data.sample(4)

Unnamed: 0,Income,Age,Loan,Loan to Income,Default
303,28010.19093,55.3669,3971.155479,0.141775,0
1122,23626.72679,34.293353,2173.76769,0.092005,0
1339,51980.35954,35.415703,6243.04503,0.120104,0
973,30578.02016,55.366162,3010.35024,0.098448,0


In [113]:
data.shape

(2000, 5)

In [114]:
data.isnull().sum()

Income            0
Age               0
Loan              0
Loan to Income    0
Default           0
dtype: int64

In [115]:
#encoder = LabelEncoder()
#data['class'] = encoder.fit_transform(data['class'])

In [116]:
data

Unnamed: 0,Income,Age,Loan,Loan to Income,Default
0,66155.92510,59.017015,8106.532131,0.122537,0
1,34415.15397,48.117153,6564.745018,0.190752,0
2,57317.17006,63.108049,8020.953296,0.139940,0
3,42709.53420,45.751972,6103.642260,0.142911,0
4,66952.68885,18.584336,8770.099235,0.130990,1
...,...,...,...,...,...
1995,59221.04487,48.518179,1926.729397,0.032535,0
1996,69516.12757,23.162104,3503.176156,0.050394,0
1997,44311.44926,28.017167,5522.786693,0.124636,1
1998,43756.05660,63.971796,1622.722598,0.037086,0


In [117]:
x = data.iloc[:,0:4]
y = data.iloc[:,-1:]
print(x.shape)
print(y.shape)

(2000, 4)
(2000, 1)


# With different ML Algorithm 

In [118]:
clf1 = LogisticRegression()
clf2 = DecisionTreeClassifier()
clf3 = KNeighborsClassifier()

In [122]:
est = [('LR',clf1),('DTC',clf1),('KNC',clf1)]
for estims in est:
    sd = cross_val_score(estims[1],x,y,cv=10,scoring='accuracy')
    print(estims[0],np.round(np.mean(sd),2))

LR 0.92
DTC 0.92
KNC 0.92


In [123]:
est = [('LR',clf1),('DTC',clf1),('KNC',clf1)]
vc = VotingClassifier(estimators=est,voting='hard')
cross = cross_val_score(vc,x,y,cv=10,scoring='accuracy')
print(np.round(np.mean(cross),2))

0.92


In [124]:
fitt = vc.fit(x,y)
ypred = vc.predict(x)

print("Accuracy = ",accuracy_score(ypred,y))

Accuracy =  0.913


# with same ML Algorithm

In [125]:
svc1 = SVC(probability=True,kernel='poly',degree=2)
svc2 = SVC(probability=True,kernel='rbf',degree = 3)
svc3 = SVC(probability=True,kernel='sigmoid',degree = 2)


In [126]:
ewq = [('svc1',svc1),('svc2',svc2),('svc3',svc3)]
for estimators in ewq:
    sd = cross_val_score(estimators[1],x,y,cv=10,scoring='accuracy')
    print(estimators[0],np.round(np.mean(sd),2))

svc1 0.86
svc2 0.86
svc3 0.79


In [127]:
vc2 = VotingClassifier(estimators=ewq,voting='hard')
cross2 = cross_val_score(vc2,x,y,cv =10,scoring='accuracy')
print(np.round(np.mean(cross2),3))

0.858


In [128]:
fittw = vc.fit(x,y)
ypred1 = vc.predict(x)
print("accuracy = ",accuracy_score(ypred,y))

accuracy =  0.913


# Regression

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import VotingRegressor

In [7]:
data1 = pd.read_csv('datasets/data_house.csv',usecols=['bedrooms','bathrooms','sqft_living','floors','sqft_above','sqft_basement','price'])
data1.shape

(21613, 7)

In [13]:
x = data1.iloc[:,1:]
y = data1.iloc[:,0:1]

In [30]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=42)
print("xtrain shape = ",xtrain.shape)
print("xtest shape = ",xtest.shape)
print("ytrain shape = ",ytrain.shape)
print("ytest shape = ",ytest.shape)

xtrain shape =  (17290, 6)
xtest shape =  (4323, 6)
ytrain shape =  (17290, 1)
ytest shape =  (4323, 1)


# With Different Models

In [31]:
lr = LinearRegression()
dtr = DecisionTreeRegressor()
sgdr = SGDRegressor()
svr = SVR()

In [32]:
models = []
models.append(('LR',lr))
models.append(('DTR',dtr))
models.append(('SGDR',sgdr))
models.append(('SVR',svr))
models

[('LR', LinearRegression()),
 ('DTR', DecisionTreeRegressor()),
 ('SGDR', SGDRegressor()),
 ('SVR', SVR())]

In [33]:
for est in models:
    score = cross_val_score(est[1],xtrain,ytrain,scoring='r2',cv=10)
    print(est[0],np.round(np.mean(score),3))

LR 0.504
DTR 0.209
SGDR -3.552970413600578e+19
SVR -0.055


In [34]:
vr = VotingRegressor(estimators=models)
scoring1 = cross_val_score(vr,xtrain,ytrain,scoring='r2',cv=10)
print("Voting Regressor R2 score = ",np.round(np.mean(scoring1),3))

Voting Regressor R2 score =  -1.0042188146215574e+18


In [35]:
vr.fit(xtrain,ytrain)
ypredd = vr.predict(xtest)
print("R2 Score = ",r2_score(ypredd,ytest))

R2 Score =  -4.3664363709390335


# With same Model

In [36]:
lr1 = LinearRegression()
lr2 = LinearRegression()
lr3 = LinearRegression()
lr4 = LinearRegression()

In [37]:
model1 = []
model1.append(('lr1',lr1))
model1.append(('lr2',lr2))
model1.append(('lr3',lr3))
model1.append(('lr4',lr4))
model1

[('lr1', LinearRegression()),
 ('lr2', LinearRegression()),
 ('lr3', LinearRegression()),
 ('lr4', LinearRegression())]

In [38]:
for esw in model1:
    scorr2 = cross_val_score(esw[1],xtrain,ytrain,scoring='r2',cv=10)
    print(esw[0],np.round(np.mean(scorr2),3))

lr1 0.504
lr2 0.504
lr3 0.504
lr4 0.504


In [39]:
vr2 = VotingRegressor(estimators=model1)
crod=cross_val_score(vr2,xtrain,ytrain,cv=10,scoring='r2')
print("Voting Regressor R2 score = ",np.round(np.mean(crod),3))

Voting Regressor R2 score =  0.504


In [40]:
vr2.fit(xtrain,ytrain)
ypredd1 = vr2.predict(xtest)
print("R2 Score = ",r2_score(ypredd,ytest))

R2 Score =  -4.3664363709390335
