In [1]:
import pandas as pd # type: ignore
from sklearn.model_selection import train_test_split # type: ignore
from sklearn.preprocessing import StandardScaler, OneHotEncoder # type: ignore
from sklearn.compose import ColumnTransformer # type: ignore
from sklearn.pipeline import Pipeline # type: ignore
from sklearn.linear_model import LogisticRegression # type: ignore

In [2]:
data = pd.read_csv('Combined_data.csv')
data = data.dropna()
X = data.drop('rideable_type', axis=1)
y = data['rideable_type']

# Categorical and continuous features
categorical_features = ['member_casual', 'season', 'day_of_week']
continuous_features = ['Elevation_Change', 'Distance', 'trip_duration', 'TMAX', 'TMIN']

# Preprocessor for scaling and encoding
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Create a pipeline with logistic regression
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit the model
pipeline.fit(X_train, y_train)

# Access the trained Logistic Regression model
logreg = pipeline.named_steps['classifier']

# Get the coefficients of the logistic regression model
coefficients = logreg.coef_[0]

# Get feature names after OneHotEncoder and concatenate with continuous features
feature_names = pipeline.named_steps['preprocessor'].transformers_[1][1].get_feature_names_out(categorical_features)
all_feature_names = list(feature_names) + continuous_features

# Create a DataFrame to view the coefficients paired with feature names
coefficients_df = pd.DataFrame(coefficients, index=all_feature_names, columns=['Coefficient'])

print(coefficients_df)


                       Coefficient
member_casual_casual     -0.041679
member_casual_member      0.031800
season_Fall              -0.807996
season_Spring            -0.075316
season_Summer            -0.033264
season_Winter             0.035987
day_of_week_Friday       -0.339277
day_of_week_Monday       -0.034578
day_of_week_Saturday     -0.046600
day_of_week_Sunday       -0.047978
day_of_week_Thursday     -0.174134
day_of_week_Tuesday       0.000109
day_of_week_Wednesday    -0.041014
Elevation_Change         -0.150065
Distance                 -0.146041
trip_duration             0.022835
TMAX                     -0.011860
TMIN                      0.022744
