In [0]:
!pip install -q sklearn

In [0]:
!pip install git+https://github.com/tensorflow/docs
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import os
try:
  # The %tensorflow_version magic only works in colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
from tensorflow import keras
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
#set path of training data according to your directory structure
URL = '/content/drive/My Drive/Dataset/flipr covid19 dataset/Train_dataset.csv' 
dataframe = pd.read_csv(URL)
dataframe.head()

In [0]:
#set path of test data according to your directory structure
testUrl = '/content/drive/My Drive/Dataset/flipr covid19 dataset/Test_dataset.csv' 
testDataframe = pd.read_csv(testUrl)
testDataframe.head()

In [0]:
peopleId= testDataframe['people_ID']

In [0]:
peopleId.head()

In [0]:
dataframe.dtypes
df = dataframe

In [0]:
def cleanData(dataframe):
  df2 = dataframe
  Gender = pd.get_dummies(df2.Gender, prefix='Gender')
  Occupation=pd.get_dummies(df2.Occupation,prefix='Occupation')
  Mode_transport=pd.get_dummies(df2.Mode_transport,prefix='Mode_transport')
  comorbidity=pd.get_dummies(df2.comorbidity,prefix='comorbidity')
  df2['Pulmonary_score'] = df2['Pulmonary score']
  df2['cardiological_pressure'] = df2['cardiological pressure']
  df2 = df2.drop(['Pulmonary score','cardiological pressure'],axis=1)
  Pulmonary_score=pd.get_dummies(df2.Pulmonary_score,prefix='Pulmonary_score')
  cardiological_pressure=pd.get_dummies(df2.cardiological_pressure,prefix='cardiological_pressure')
  df2 = pd.concat([Gender,Occupation,Mode_transport,comorbidity,Pulmonary_score,cardiological_pressure,df2] , axis=1)
  df2=df2.drop(['Region','Gender','Occupation','Mode_transport','comorbidity','Pulmonary_score','cardiological_pressure'],axis = 1)
  df = df2
  df=df.drop(['Designation', 'Name', 'Married', 'Children', 'Insurance', 'salary','people_ID'], axis = 1) 
  return df

In [0]:
testData = cleanData(testDataframe)

In [0]:
testData.shape

In [0]:
testData.head()

In [0]:
df2 = dataframe
df2.head()

In [0]:
df2 = cleanData(df2)

In [0]:
df2.head()

In [0]:
#handeling na
df2.isna().sum()

In [0]:
#to keep simple initially we are dropping all na columns
df2 = df2.dropna() 
print('after dropping all na')
df2.isna().sum()

In [0]:
train_dataset = df2.sample(frac=0.9,random_state=0)
test_dataset = df2.drop(train_dataset.index)
print(len(train_dataset), 'train examples')
print(len(test_dataset), 'validation examples')

In [0]:
train_stats = train_dataset.describe()
train_stats.pop("Infect_Prob")
train_stats = train_stats.transpose()
train_stats

In [0]:
train_labels = train_dataset.pop('Infect_Prob')
test_labels = test_dataset.pop('Infect_Prob')

In [0]:
train_labels.head()

In [0]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [0]:
normed_train_data.head()

In [0]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(normed_train_data.keys())]),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.0005)

  model.compile(loss='mae',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [0]:
model = build_model()

In [0]:
model.summary()

In [0]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result

In [0]:
EPOCHS = 500
history = model.fit(
  train_dataset, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[tfdocs.modeling.EpochDots()])

In [0]:
import matplotlib.pyplot as plt

In [0]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [0]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylim([0, 10])
plt.ylabel('MAE [Infect_Prob]')

In [0]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylim([0, 200])
plt.ylabel('MSE [Infect_prob^2]')

In [0]:
test_predictions = model.predict(test_dataset).flatten()
a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [Infect_prob]')
plt.ylabel('Predictions [Infect_prob]')
lims = [0, 100]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [0]:
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [Infect_prob]")
_ = plt.ylabel("Count")

In [0]:
ans = model.predict(testData)

In [0]:
# Creating an empty Dataframe with column names only
dfObj = pd.DataFrame(columns=['people_ID', 'infect_prob'])

In [0]:
peopleIdArr = peopleId.to_numpy()

In [0]:
k = 0
while(k< len(peopleIdArr)):
  i = int(peopleIdArr[k])
  j = ans[k][0]
  dfObj = dfObj.append({'people_ID': i,'infect_prob':j}, ignore_index=True)
  k+=1
dfObj.head()

In [0]:
dfObj2 = dfObj.astype({'people_ID': int,'infect_prob':float})
dfObj2.head()

In [0]:
dfObj2.to_csv(r'/content/prob1.csv', index=False) 

In [0]:
  from google.colab import files
  files.download("/content/prob1.csv")