# Importing Dependencies

In [167]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection  import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import joblib

# Data Loading and analysis

In [168]:
# Loading the hunger dataset using pandas dataframe

data = pd.read_csv("hunger.csv")

In [169]:
# printing the first five rows of the data

data.head()

Unnamed: 0,county,population,Area_square_km,place_max_temp(deg),place_min_temp(deg),humidity(%),rainfall(mm),precipitation(%),fishing,outcome
0,Mombasa,1208333,219.9,32,24.3,64,4,6,0,1
1,Kwale,866820,8267.1,31,24.0,45,5,5,0,1
2,Kilifi,1453787,12539.7,32,24.3,56,6,8,0,1
3,Tana River,315943,37950.5,33,24.0,39,8,9,0,1
4,Lamu,143920,6253.3,35,24.0,52,5,6,0,1


In [170]:
#  Number of rows and columns of the dataset
data.shape

(50, 10)

In [171]:
# Getting the statistical measures of the data

data.describe()

Unnamed: 0,population,Area_square_km,place_max_temp(deg),place_min_temp(deg),humidity(%),rainfall(mm),precipitation(%),fishing,outcome
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,1019365.0,11774.558,30.74,19.752,51.36,22.94,13.74,0.58,0.62
std,672341.7,16987.86657,3.832248,4.292625,8.163533,15.316271,8.260578,0.498569,0.490314
min,143920.0,219.9,17.0,10.0,39.0,4.0,5.0,0.0,0.0
25%,613118.5,2530.075,28.0,17.0,42.75,7.25,9.0,0.0,0.0
50%,914376.5,3219.1,30.5,19.0,55.0,32.0,11.0,1.0,1.0
75%,1155574.0,12148.875,33.0,24.0,59.0,37.0,11.0,1.0,1.0
max,4397073.0,70944.1,38.0,25.0,64.0,40.0,28.0,1.0,1.0


In [172]:
#  checking the mean of those affected with hunger and those who have not 

data.groupby('outcome').mean()

Unnamed: 0_level_0,population,Area_square_km,place_max_temp(deg),place_min_temp(deg),humidity(%),rainfall(mm),precipitation(%),fishing
outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1021735.0,5325.621053,28.105263,17.473684,52.210526,27.105263,10.631579,0.842105
1,1017913.0,15727.132258,32.354839,21.148387,50.83871,20.387097,15.645161,0.419355


In [173]:
"""
separating data data variables i.e independent variable (y) dependent 
and Dependent variables (y)
"""
x = data.drop(columns=['outcome', 'county'], axis=1)
x.head()

Unnamed: 0,population,Area_square_km,place_max_temp(deg),place_min_temp(deg),humidity(%),rainfall(mm),precipitation(%),fishing
0,1208333,219.9,32,24.3,64,4,6,0
1,866820,8267.1,31,24.0,45,5,5,0
2,1453787,12539.7,32,24.3,56,6,8,0
3,315943,37950.5,33,24.0,39,8,9,0
4,143920,6253.3,35,24.0,52,5,6,0


In [174]:
y = data['outcome']
y.head()

0    1
1    1
2    1
3    1
4    1
Name: outcome, dtype: int64

# Data Standardization

In [175]:
scaler = StandardScaler()
scaler.fit(x)

In [176]:
standard_data = scaler.transform(x)
standard_data

array([[ 0.28391274, -0.68707669,  0.33212681,  1.07024813,  1.56406898,
        -1.24914798, -0.94649322, -1.1751393 ],
       [-0.22918976, -0.2085646 ,  0.0685341 ,  0.99965129, -0.78698407,
        -1.18319508, -1.06877916, -1.1751393 ],
       [ 0.65269244,  0.04549778,  0.33212681,  1.07024813,  0.5741519 ,
        -1.11724217, -0.70192133, -1.1751393 ],
       [-1.05684893,  1.55650472,  0.59571952,  0.99965129, -1.52942188,
        -0.98533637, -0.57963538, -1.1751393 ],
       [-1.31530302, -0.32831155,  1.12290493,  0.99965129,  0.07919337,
        -1.18319508, -0.94649322, -1.1751393 ],
       [-1.01969662,  0.31975979,  1.12290493,  0.76432848, -0.78698407,
        -0.98533637, -1.06877916, -1.1751393 ],
       [-0.26745238,  1.95999211,  1.91368305,  1.23497409, -1.40568225,
        -1.18319508, -0.70192133, -1.1751393 ],
       [-0.35773395,  2.67575634,  1.38649764,  0.99965129, -1.52942188,
        -1.18319508, -0.57963538, -1.1751393 ],
       [-0.22823271,  0.84231032

In [177]:
x = standard_data
y = data['outcome']


In [178]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(40, 8) (40,) (10, 8) (10,)


# # Trainning The model

In [179]:
classifier = svm.SVC(kernel='linear')
classifier.fit(x_train, y_train)

# Model Evaluation and Validation

In [180]:
# accuracy_score of the svm classifier

x_train_predictions = classifier.predict(x_train)

trainning_data_accuracy = accuracy_score(x_train_predictions, y_train)

print(trainning_data_accuracy)

0.95


# Making a predictive system

In [181]:
input_data = (393177,2564.4,32,18,46,35,10,1,)

# changing the inputs to numpy data_array
input_data_as_numpy_array = np.asanyarray(input_data)

# Reshape data in the array since we are predicting data of only one instance
reshaped_data = input_data_as_numpy_array.reshape(1, -1)

#  Standardize the input data
std_data = scaler.transform(reshaped_data)

print(std_data)

[[-0.94080955 -0.54766527  0.33212681 -0.41228556 -0.66324444  0.79539201
  -0.45734944  0.85096294]]




In [182]:
predictions = classifier.predict(std_data)
print(predictions)

[0]


# Creating a .sav File to save our model


In [183]:
# Naming the file as hunger_predictor

file_name = 'hunger_predictor'
saved_model = joblib.dump(classifier, file_name)

# That is the last procedure of data analysis, building prediction model, evaluating the model

In [None]:
# The Remaining part is now implementing the system with the SVM classifer
# to make prediction in real web application.