In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn import preprocessing 
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score

In [9]:
df=pd.read_csv("C:/Users/ARAVINDH/Downloads/forestfires.csv",index_col=False)

In [10]:
df.head(3)

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small


In [11]:
df.shape

(517, 31)

In [12]:
label_encoder=preprocessing.LabelEncoder() 
df['month']=label_encoder.fit_transform(df['month']) 
df['month'].unique() 

array([ 7, 10,  1, 11,  0,  6,  5,  3,  4,  2,  8,  9])

In [14]:
df['day']=label_encoder.fit_transform(df['day']) 
df['day'].unique() 

array([0, 5, 2, 3, 1, 6, 4])

In [21]:
df.dtypes

month              int32
day                int32
FFMC             float64
DMC              float64
DC               float64
ISI              float64
temp             float64
RH                 int64
wind             float64
rain             float64
area             float64
dayfri             int64
daymon             int64
daysat             int64
daysun             int64
daythu             int64
daytue             int64
daywed             int64
monthapr           int64
monthaug           int64
monthdec           int64
monthfeb           int64
monthjan           int64
monthjul           int64
monthjun           int64
monthmar           int64
monthmay           int64
monthnov           int64
monthoct           int64
monthsep           int64
size_category     object
dtype: object

In [None]:
#TRAIN AND TEST SPLIT

In [17]:
train,test=train_test_split(df,test_size = 0.25,random_state=0)

In [18]:
test.shape

(130, 31)

In [19]:
train.shape

(387, 31)

In [23]:
x_train=train.iloc[:,:-1]
y_train=train.iloc[:,-1]
x_test=test.iloc[:,:-1]
y_test=test.iloc[:,-1]

In [None]:
#Feature scaling

In [24]:
sc = StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)

In [None]:
#SVC
#Linear Kernal

In [25]:
model=SVC(kernel = "linear")
model.fit(x_train,y_train)
pred_y=model.predict(x_test)

In [26]:
np.mean(pred_y==y_test)

0.9307692307692308

In [None]:
#POLY Kernal

In [27]:
model=SVC(kernel = "poly")
model.fit(x_train,y_train)
pred_y1=model.predict(x_test)

In [28]:
np.mean(pred_y1==y_test)

0.6923076923076923

In [None]:
#RBF Kernal

In [29]:
model=SVC(kernel = "rbf")
model.fit(x_train,y_train)
pred_y2=model.predict(x_test)

In [31]:
np.mean(pred_y2==y_test)

0.7769230769230769

In [32]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((387, 30), (387,), (130, 30), (130,))

In [None]:
#Grid search Cross validation

In [36]:
clf=SVC()
param_grid = [{'kernel':['rbf'],'gamma':[50,5,10,0.5,0.1],'C':[25,22,20,18,16,15,14,13,12,11,10,0.1,0.001] }]
gsv=GridSearchCV(clf,param_grid,cv=10)
gsv.fit(x_train,y_train)

GridSearchCV(cv=10, estimator=SVC(),
             param_grid=[{'C': [25, 22, 20, 18, 16, 15, 14, 13, 12, 11, 10, 0.1,
                                0.001],
                          'gamma': [50, 5, 10, 0.5, 0.1], 'kernel': ['rbf']}])

In [37]:
gsv.best_params_ ,gsv.best_score_ 

({'C': 22, 'gamma': 0.1, 'kernel': 'rbf'}, 0.8189608636977057)

In [38]:
clf = SVC(C=22, gamma =0.1)
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)

In [39]:
accuracy=accuracy_score(y_test,y_pred) * 100
accuracy

76.92307692307693

In [None]:
#Confusion matrix

In [40]:
confusion_matrix(y_test,y_pred)

array([[19, 18],
       [12, 81]], dtype=int64)