In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

In [2]:
df = pd.read_csv("winequality-red.csv", sep=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
x = df.drop(columns=["quality"])
y = df.quality

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=20)

In [5]:
svc = SVC()

In [6]:
svc.fit(x_train, y_train)

SVC()

In [7]:
svc.score(x_test, y_test)

0.5

In [8]:
param = {
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "C": [0.1, 1, 10, 100],
#     "gamma": ["scale", "auto"]
}
svc_grid = GridSearchCV(estimator=SVC(), param_grid=param, verbose=3)

In [9]:
svc_grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.600 total time=   0.2s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.525 total time=   0.3s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.575 total time=   0.2s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.600 total time=   0.2s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.548 total time=   0.3s
[CV 1/5] END ................C=0.1, kernel=poly;, score=0.471 total time=   0.0s
[CV 2/5] END ................C=0.1, kernel=poly;, score=0.471 total time=   0.0s
[CV 3/5] END ................C=0.1, kernel=poly;, score=0.458 total time=   0.0s
[CV 4/5] END ................C=0.1, kernel=poly;, score=0.475 total time=   0.0s
[CV 5/5] END ................C=0.1, kernel=poly;, score=0.498 total time=   0.0s
[CV 1/5] END .................C=0.1, kernel=rbf;, score=0.521 total time=   0.0s
[CV 2/5] END .................C=0.1, kernel=rbf;

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
             verbose=3)

In [13]:
best_svc_grid = svc_grid.best_estimator_
best_svc_grid

SVC(C=100)

In [14]:
best_svc_grid.score(x_test, y_test)

0.58

In [15]:
best_svc_grid.score(x_train, y_train)

0.5904920767306089

In [8]:
df = pd.read_csv("Admission_Prediction.csv")
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [9]:
df.isna().sum()

Serial No.            0
GRE Score            15
TOEFL Score          10
University Rating    15
SOP                   0
LOR                   0
CGPA                  0
Research              0
Chance of Admit       0
dtype: int64

In [10]:
df["GRE Score"].fillna(df["GRE Score"].mean(), inplace=True)
df["TOEFL Score"].fillna(df["TOEFL Score"].mean(), inplace=True)
df["University Rating"].fillna(df["University Rating"].mean(), inplace=True)

In [11]:
x = df.drop(columns=["Serial No.", "Chance of Admit"])
y = df["Chance of Admit"]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=30, test_size=0.20)

In [13]:
from sklearn.svm import SVR

In [14]:
svr = SVR()

In [15]:
svr.fit(x_train, y_train)

SVR()

In [16]:
from sklearn.metrics import r2_score

In [38]:
y_pred = svr.predict(x_test)

In [39]:
r2_score(y_test, y_pred)

0.6774861142346673

In [47]:
svr.predict([x_test.iloc[0]])



array([0.79688575])

In [48]:
y_test.iloc[0]list

0.7

In [49]:
svr.score(x_test, y_test)

0.6774861142346673

# Stacking

In [17]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [18]:
df = pd.read_csv("winequality-red.csv", sep=";")

In [19]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [20]:
x = df.drop(["quality"], axis=1)
y = df.quality

In [21]:
x.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


In [75]:
train, val_test_x, test, val_test_y = train_test_split(x, y, train_size=0.5, random_state=30) 

In [76]:
x_train, x_test, y_train, y_test = train_test_split(train, test, random_state=30, test_size=0.2)

In [77]:
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)

KNeighborsClassifier()

In [78]:
knn.score(x_test, y_test)

0.54375

In [79]:
svc = SVC()
svc.fit(x_train, y_train)

SVC()

In [80]:
svc.score(x_test, y_test)

0.4875

In [81]:
prediction_knn = knn.predict(val_test_x)
prediction_svc = svc.predict(val_test_x)

In [82]:
prediction_knn

array([7, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 6, 5, 5, 6,
       5, 5, 6, 6, 6, 5, 5, 6, 5, 6, 6, 5, 6, 6, 6, 6, 5, 7, 6, 6, 5, 4,
       6, 7, 6, 6, 5, 6, 5, 5, 5, 6, 5, 5, 6, 6, 5, 5, 6, 5, 5, 6, 5, 5,
       6, 6, 5, 4, 5, 5, 5, 6, 6, 6, 7, 5, 5, 6, 6, 6, 7, 6, 5, 5, 5, 6,
       5, 5, 5, 7, 6, 7, 6, 7, 5, 6, 5, 5, 5, 6, 5, 7, 6, 5, 6, 5, 7, 5,
       5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 5, 5, 6, 5, 5, 6, 6, 5,
       5, 6, 7, 5, 5, 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 6, 7, 5, 5, 5, 5,
       6, 5, 6, 5, 4, 5, 5, 6, 6, 5, 6, 5, 6, 6, 6, 6, 6, 5, 6, 5, 6, 5,
       6, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 7, 5, 6, 5,
       5, 6, 6, 5, 6, 5, 5, 6, 6, 6, 6, 5, 6, 6, 5, 6, 5, 4, 5, 6, 7, 5,
       6, 6, 5, 6, 6, 6, 6, 6, 6, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 6, 5, 6,
       6, 6, 5, 6, 6, 5, 6, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 6, 6, 5, 6, 6,
       5, 5, 6, 6, 6, 6, 5, 6, 6, 5, 5, 6, 5, 6, 5, 6, 5, 6, 6, 5, 6, 6,
       7, 5, 6, 7, 5, 4, 6, 7, 5, 5, 6, 5, 5, 6, 5,

In [83]:
prediction_svc

array([6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 6, 5, 6, 6,
       6, 5, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 6, 5, 5, 6, 5, 6, 6, 5, 5,
       6, 6, 5, 6, 5, 5, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 5, 6, 5, 6,
       5, 6, 5, 6, 5, 6, 6, 6, 5, 6, 5, 6, 5, 6, 6, 6, 6, 6, 6, 5, 6, 5,
       5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 5, 5, 5,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 6, 5, 6, 6, 6, 6, 5, 5, 6, 6,
       6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 6, 5, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6,
       5, 5, 6, 6, 5, 6, 6, 5, 5, 6, 6, 6, 5, 6, 5, 6, 6, 5, 6, 6, 6, 5,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 6, 6, 5,
       6, 6, 5, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 5, 6,
       6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 6, 6, 5, 6, 6,
       6, 5, 6, 5, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6,
       6, 6, 5, 6, 5, 6, 6, 6, 6, 6, 5, 5, 5, 6, 5,

In [85]:
input3 = np.column_stack((prediction_knn, prediction_svc))
output = val_test_y

In [86]:
output

1147    7
659     4
871     5
1333    5
1411    6
       ..
1073    6
200     7
942     7
1106    6
1329    6
Name: quality, Length: 800, dtype: int64

In [87]:
rf = RandomForestClassifier()
rf.fit(input3, output)

RandomForestClassifier()

In [88]:
rf.score(x_test, y_test)



ValueError: X has 11 features, but RandomForestClassifier is expecting 2 features as input.

In [91]:
knn_output = knn.predict(x_test)
svc_output = svc.predict(x_test)
rf_input = np.column_stack((knn_output, svc_output))

In [93]:
rf.score(rf_input, y_test)

0.55

In [94]:
rf.predict(rf_input)

array([5, 6, 5, 5, 6, 6, 5, 6, 6, 5, 6, 5, 5, 5, 6, 6, 6, 5, 5, 6, 5, 6,
       6, 5, 5, 5, 6, 5, 5, 5, 6, 5, 6, 5, 6, 6, 5, 5, 6, 5, 5, 6, 5, 5,
       6, 6, 5, 5, 6, 5, 6, 5, 6, 5, 5, 6, 6, 6, 6, 5, 5, 5, 5, 6, 6, 5,
       5, 5, 6, 5, 5, 5, 5, 6, 6, 6, 6, 5, 5, 5, 6, 6, 5, 6, 5, 6, 6, 5,
       6, 6, 5, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 6, 6, 5, 6, 5, 5, 6, 6, 5,
       5, 6, 6, 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 5,
       6, 6, 5, 6, 6, 5, 5, 6, 5, 5, 5, 6, 6, 5, 5, 6, 5, 6, 6, 5, 6, 6,
       5, 5, 5, 6, 5, 5])