In [30]:
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Preprocessing

### Viewing Data

In [2]:
#  Import and read the charity_data.csv.
import pandas as pd 
data_df = pd.read_csv("processeddata.csv")
data_df.head()

Unnamed: 0,PatientId,AppointmentID,Gender,ScheduledDay,AppointmentDay,Age,Neighbourhood,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show,WeekDay,TimeDelta
0,29872500000000.0,5642903,1,2016-04-29,2016-04-29,62,JARDIM DA PENHA,0,1,0,0,0,0,1,4,0
1,558997800000000.0,5642503,0,2016-04-29,2016-04-29,56,JARDIM DA PENHA,0,0,0,0,0,0,1,4,0
2,4262962000000.0,5642549,1,2016-04-29,2016-04-29,62,MATA DA PRAIA,0,0,0,0,0,0,1,4,0
3,867951200000.0,5642828,1,2016-04-29,2016-04-29,8,PONTAL DE CAMBURI,0,0,0,0,0,0,1,4,0
4,8841186000000.0,5642494,1,2016-04-29,2016-04-29,56,JARDIM DA PENHA,0,1,1,0,0,0,1,4,0


In [3]:
data_df.dtypes

PatientId         float64
AppointmentID       int64
Gender              int64
ScheduledDay       object
AppointmentDay     object
Age                 int64
Neighbourhood      object
Scholarship         int64
Hipertension        int64
Diabetes            int64
Alcoholism          int64
Handcap             int64
SMS_received        int64
No-show             int64
WeekDay             int64
TimeDelta           int64
dtype: object

### Datatype Changing

In [4]:
data_df['PatientId'] = data_df['PatientId'].astype('object')
data_df.dtypes

PatientId         object
AppointmentID      int64
Gender             int64
ScheduledDay      object
AppointmentDay    object
Age                int64
Neighbourhood     object
Scholarship        int64
Hipertension       int64
Diabetes           int64
Alcoholism         int64
Handcap            int64
SMS_received       int64
No-show            int64
WeekDay            int64
TimeDelta          int64
dtype: object

In [5]:
data_df.head()

Unnamed: 0,PatientId,AppointmentID,Gender,ScheduledDay,AppointmentDay,Age,Neighbourhood,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show,WeekDay,TimeDelta
0,29872500000000.0,5642903,1,2016-04-29,2016-04-29,62,JARDIM DA PENHA,0,1,0,0,0,0,1,4,0
1,558998000000000.0,5642503,0,2016-04-29,2016-04-29,56,JARDIM DA PENHA,0,0,0,0,0,0,1,4,0
2,4262960000000.0,5642549,1,2016-04-29,2016-04-29,62,MATA DA PRAIA,0,0,0,0,0,0,1,4,0
3,867951000000.0,5642828,1,2016-04-29,2016-04-29,8,PONTAL DE CAMBURI,0,0,0,0,0,0,1,4,0
4,8841190000000.0,5642494,1,2016-04-29,2016-04-29,56,JARDIM DA PENHA,0,1,1,0,0,0,1,4,0


### Dropping Columns

In [6]:
data_df_test = data_df.drop(['PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay', 'Neighbourhood', 'TimeDelta'],axis=1)
data_df_test.head()

Unnamed: 0,Gender,Age,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,No-show,WeekDay
0,1,62,0,1,0,0,0,0,1,4
1,0,56,0,0,0,0,0,0,1,4
2,1,62,0,0,0,0,0,0,1,4
3,1,8,0,0,0,0,0,0,1,4
4,1,56,0,1,1,0,0,0,1,4


## Model Set up

### Data Split

In [7]:
X = data_df_test.drop(['No-show'], axis = 1)
y = data_df_test["No-show"]

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

### Logistic Regression

In [9]:
classifier = LogisticRegression(solver='lbfgs',
   max_iter=200,
   random_state=1)
classifier.fit(X_train, y_train)
y_pred_log_reg = classifier.predict(X_test)
print(f"Logistic regression accuracy: {accuracy_score(y_test,y_pred_log_reg)}")

### Random Forest Regression

In [27]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train, y_train)

# Evaluate the model
y_pref_rf = rf_model.predict(X_test)

print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred_rf)}")

### Linear Regression

In [34]:
# Create a random forest classifier.
reg = LinearRegression().fit(X, y)

# Fitting the model
reg = reg.fit(X_train, y_train)

# Evaluate the model
y_pred_reg = reg.predict(X_test)

print(f" Linear Regression predictive accuracy: {accuracy_score(y_test,y_pred_reg)}")

### Neural Network

In [19]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
#  YOUR CODE GOES HERE

nn = tf.keras.models.Sequential()

number_input_features = len(X_train['Gender'])
hidden_nodes_layer1 =  80
hidden_nodes_layer2 = 30

# First hidden layer
#  YOUR CODE GOES HERE
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Second hidden layer
#  YOUR CODE GOES HERE
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
# Output layer
#  YOUR CODE GOES HERE
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 80)                5924320   
_________________________________________________________________
dense_2 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 31        
Total params: 5,926,781
Trainable params: 5,926,781
Non-trainable params: 0
_________________________________________________________________


In [20]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [21]:
fit_model = nn.fit(X_train,y_train)

ValueError: in user code:

    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\Supriya Singh\Anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:259 assert_input_compatibility
        ' but received input with shape ' + display_shape(x.shape))

    ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected axis -1 of input shape to have value 74053 but received input with shape (None, 9)


In [None]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")