In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer # type: ignore
import tensorflow as tf # type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
import pandas as pd
from sklearn.metrics import classification_report

data = pd.read_csv('Combined_data.csv')

# Encode categorical variables using OneHotEncoder and scale continuous variables
categorical_features = ['member_casual', 'season', 'day_of_week']
continuous_features = ['Elevation_Change', 'Distance', 'trip_duration', 'TMAX', 'TMIN']

# Preprocessor that scales continuous features and encodes categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Split the data into features and target
X = data.drop(['rideable_type','ride_id', 'rideable_type', 'started_at', 'ended_at', 'start_station_name',
               'start_station_id', 'end_station_name', 'end_station_id'], axis=1)
y = data['rideable_type']

# Fit the preprocessor and transform the feature data
X_processed = preprocessor.fit_transform(X)

# Convert target variable to one-hot encoding
y_encoded = to_categorical(y.factorize()[0])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.3, random_state=42)

# Neural network model
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(y_encoded.shape[1], activation='softmax')  # Output layer with one neuron per class
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
evaluation = model.evaluate(X_test, y_test)

# Print accuracy
print('Test Accuracy:', evaluation[1])

# Classification report
y_pred = model.predict(X_test)
y_pred_classes = tf.argmax(y_pred, axis=1).numpy()
y_true_classes = tf.argmax(y_test, axis=1).numpy()

print(classification_report(y_true_classes, y_pred_classes))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 319us/step - accuracy: 0.5128 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 2/10
[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 321us/step - accuracy: 0.5126 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 3/10
[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 327us/step - accuracy: 0.5122 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 4/10
[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 325us/step - accuracy: 0.5128 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 5/10
[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 324us/step - accuracy: 0.5125 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 6/10
[1m199283/199283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 325us/step - accuracy: 0.5123 - loss: nan - val_accuracy: 0.5117 - val_loss: nan
Epoch 7/10
[1m199283

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.51      1.00      0.68   1750525
           1       0.00      0.00      0.00   1665742

    accuracy                           0.51   3416267
   macro avg       0.26      0.50      0.34   3416267
weighted avg       0.26      0.51      0.35   3416267



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
