In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import pickle
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

def rfeFeature(indep_x,dep_y,n):
    rfelist = []
    log_model = LogisticRegression(solver = 'lbfgs')
    RF = RandomForestClassifier(n_estimators = 10,criterion = 'entropy',random_state = 0)
    #NB = GaussianNB()
    DT = DecisionTreeClassifier(criterion = 'gini',max_features = 'sqrt',splitter = 'best',random_state = 0)
    svc_model = SVC(kernel = 'linear',random_state = 0)
    #knn = KNeighborsClassifier(n_neighbors = 5,metric = 'minkowski',p = 2)
    rfemodellist = [log_model,svc_model,RF,DT]
    for i in rfemodellist:
        print(i)
        log_rfe = RFE(estimator=i, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_x,dep_y)
        log_rfe_feature = log_fit.transform(indep_x)
        rfelist.append(log_rfe_feature)
    return rfelist

def split_scalar(indep_x,dep_y):
    x_train,x_test,y_train,y_test=train_test_split(indep_x,dep_y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test,y_train,y_test

def cm_prediction(classifier,x_test):
    y_pred = classifier.predict(x_test)
    # making the confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test,y_pred)
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    #from sklearn.metrics import confusion matrix
    #cm = confusion_matrix(y_test,y_pred)
    Accuracy = accuracy_score(y_test,y_pred)
    report = classification_report(y_test,y_pred)
    return classifier,Accuracy,report,x_test,y_test,cm

def logistic(x_train,y_train,x_test):
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def svm_linear(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel = 'linear', random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def svm_NL(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel = 'rbf', random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def Naive(x_train,y_train,x_test):
    #Fitting K-NN to the Traing set
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def knn(x_train,y_train,x_test):
    #Fitting K-NN to the Traing set
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski',p=2)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def Decision(x_train,y_train,x_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def random(x_train,y_train,x_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators = 10, criterion= 'entropy' ,random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

def rfe_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf):
    rfedataframe = pd.DataFrame(index = ['Logistic','SVC','Random','DecisionTree'],columns = ['Logistic','SVMl','SVMnl',
                                                                                              'KNN','Naive','Decision','Random'])
    for number,idex in enumerate(rfedataframe.index):
        rfedataframe['Logistic'][idex] = acclog[number]
        rfedataframe['SVMl'][idex]=accsvml[number]
        rfedataframe['SVMnl'][idex]=accsvmnl[number]
        rfedataframe['KNN'][idex]=accknn[number]
        rfedataframe['Naive'][idex]=accnav[number]
        rfedataframe['Decision'][idex]=accdes[number]
        rfedataframe['Random'][idex]=accrf[number]
    return rfedataframe

In [3]:
dataset1 = pd.read_csv("preprocessed_Rainfall_dataset.csv",index_col=None)
df2=dataset1
df2=pd.get_dummies(df2,dtype=int,drop_first=True)
indep_x=df2.drop("rainfall_yes",axis=1)
dep_y=df2["rainfall_yes"]

In [5]:
#df2.isnull().sum()

In [7]:
df2

Unnamed: 0,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed,rainfall_yes
0,1,1025.9,19.9,18.3,16.8,13.1,72,49,9.3,80.0,26.3,1
1,2,1022.0,21.7,18.9,17.2,15.6,81,83,0.6,50.0,15.3,1
2,3,1019.7,20.3,19.3,18.0,18.4,95,91,0.0,40.0,14.2,1
3,4,1018.9,22.3,20.6,19.1,18.8,90,88,1.0,50.0,16.9,1
4,5,1015.9,21.3,20.7,20.2,19.9,95,81,0.0,40.0,13.7,1
...,...,...,...,...,...,...,...,...,...,...,...,...
361,27,1022.7,18.8,17.7,16.9,15.0,84,90,0.0,30.0,18.4,1
362,28,1026.6,18.6,17.3,16.3,12.8,75,85,1.0,20.0,25.9,1
363,29,1025.9,18.9,17.7,16.4,13.3,75,78,4.6,70.0,33.4,1
364,30,1025.3,19.2,17.3,15.2,13.3,78,86,1.2,20.0,20.9,1


In [39]:
rfelist = rfeFeature(indep_x,dep_y,3)

acclog = []
accsvml = []
accsvmnl = []
accknn = []
accnav = []
accdes = []
accrf = []

LogisticRegression()
SVC(kernel='linear', random_state=0)
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)


In [41]:
for i in rfelist:
    x_train,x_test,y_train,y_test = split_scalar(i,dep_y)
    classifier,Accuracy,report,x_test,y_test,cm=logistic(x_train,y_train,x_test)
    acclog.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=svm_linear(x_train,y_train,x_test)
    accsvml.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=svm_NL(x_train,y_train,x_test)
    accsvmnl.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=knn(x_train,y_train,x_test)
    accknn.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=Naive(x_train,y_train,x_test)
    accnav.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=Decision(x_train,y_train,x_test)
    accdes.append(Accuracy)

    classifier,Accuracy,report,x_test,y_test,cm=random(x_train,y_train,x_test)
    accrf.append(Accuracy)
result = rfe_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)

In [43]:
result
#3

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
Logistic,0.717391,0.706522,0.706522,0.663043,0.663043,0.641304,0.673913
SVC,0.836957,0.836957,0.815217,0.804348,0.847826,0.706522,0.858696
Random,0.836957,0.847826,0.815217,0.793478,0.793478,0.73913,0.75
DecisionTree,0.836957,0.858696,0.782609,0.836957,0.793478,0.728261,0.815217


In [25]:
result
#4

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
Logistic,0.706522,0.695652,0.706522,0.684783,0.641304,0.684783,0.706522
SVC,0.836957,0.836957,0.815217,0.793478,0.836957,0.771739,0.804348
Random,0.815217,0.847826,0.815217,0.826087,0.804348,0.73913,0.782609
DecisionTree,0.847826,0.858696,0.782609,0.793478,0.793478,0.728261,0.815217


In [31]:
result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
Logistic,0.804348,0.804348,0.804348,0.76087,0.771739,0.717391,0.73913
SVC,0.847826,0.847826,0.826087,0.815217,0.804348,0.75,0.771739
Random,0.815217,0.847826,0.804348,0.782609,0.804348,0.695652,0.836957
DecisionTree,0.847826,0.847826,0.793478,0.826087,0.793478,0.75,0.847826


In [37]:
result
#6

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
Logistic,0.826087,0.836957,0.826087,0.836957,0.815217,0.695652,0.782609
SVC,0.815217,0.826087,0.847826,0.815217,0.804348,0.76087,0.815217
Random,0.804348,0.836957,0.804348,0.793478,0.793478,0.73913,0.804348
DecisionTree,0.847826,0.847826,0.793478,0.804348,0.782609,0.73913,0.804348


In [45]:
import pickle

In [47]:
filename="finalaized_model_RandomForestClassifier.sav"

In [49]:
pickle.dump(classifier,open(filename,'wb'))

In [51]:
loaded_model=pickle.load(open("finalaized_model_RandomForestClassifier.sav",'rb'))

In [53]:
result = loaded_model.predict([[11,34,25]])

In [55]:
result

array([0])