In [3]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('dispatch.csv')

# Get the column numbers (indices) you want to keep
column_numbers_to_keep = [1, 2, 3, 8, 12, 13, 19, 20, 21]

# Keep the specified columns
df = df.iloc[:, column_numbers_to_keep]

# Drop rows with any blank data
df.dropna(inplace=True)

# Count the number of rows after removing blank data
num_rows_after = len(df)

# Remove duplicate rows
df = df.drop_duplicates()

# Display summary statistics of the dataset
#print(df.describe())

# Display information about the dataset, including column names and data types
#print(df.info())



In [4]:
from sklearn.preprocessing import LabelEncoder

# Identify non-numerical columns
non_numerical_columns = df.select_dtypes(exclude=['number']).columns.tolist()

# Perform label encoding on non-numerical columns
label_encoder = LabelEncoder()
for column in non_numerical_columns:
    df[column] = label_encoder.fit_transform(df[column])


In [5]:

from sklearn.feature_selection import SelectKBest, mutual_info_classif

# Separate the features (X) and the target variable (y)
X = df.drop('INCIDENT_RESPONSE_SECONDS_QY', axis=1)
y = df['INCIDENT_RESPONSE_SECONDS_QY']

# Perform feature selection using mutual information
selector = SelectKBest(score_func=mutual_info_classif, k='all')
X_selected = selector.fit_transform(X, y)

# Get the selected feature names and their corresponding scores
selected_features_scores = pd.DataFrame({'Feature': X.columns, 'Score': selector.scores_})

# Sort the features based on their scores in descending order
sorted_features = selected_features_scores.sort_values(by='Score', ascending=False)

# Print the selected feature names and scores in descending order
print("Selected features in descending order of importance:")
print(sorted_features)

Selected features in descending order of importance:
                         Feature     Score
4  INCIDENT_TRAVEL_TM_SECONDS_QY  2.345162
3   DISPATCH_RESPONSE_SECONDS_QY  0.178360
1              INITIAL_CALL_TYPE  0.068437
2    INITIAL_SEVERITY_LEVEL_CODE  0.062455
7                        ZIPCODE  0.024034
5                        BOROUGH  0.014770
6         INCIDENT_DISPATCH_AREA  0.009990
0              INCIDENT_DATETIME  0.008897


In [6]:
# Get the column numbers (indices) you want to keep (Score > 0.05)
column_numbers_to_keep = [4, 3, 2, 1]

# Keep the specified columns
df = df.iloc[:, column_numbers_to_keep]

In [7]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=32)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize and train the models
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)


# Make predictions on the test set
linear_pred = linear_model.predict(X_test)


# Evaluate the models
linear_rmse = mean_squared_error(y_test, linear_pred, squared=False)


# Print the RMSE scores
print("Linear Regression RMSE:", linear_rmse)



Linear Regression RMSE: 0.3333837948036266


In [8]:

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Initialize and train the models
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)



# Make predictions on the test set
tree_pred = tree_model.predict(X_test)


# Evaluate the models
tree_rmse = mean_squared_error(y_test, tree_pred, squared=False)


# Print the RMSE scores
print("Decision Tree RMSE:", tree_rmse)



Decision Tree RMSE: 20.72297498922306


In [9]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Initialize and train the models
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)


# Make predictions on the test set
rf_pred = rf_model.predict(X_test)

# Evaluate the models
rf_rmse = mean_squared_error(y_test, rf_pred, squared=False)


# Print the RMSE scores
print("Random Forest RMSE:", rf_rmse)


Random Forest RMSE: 16.138885020955286


In [10]:

from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Initialize and train the models
svr_model = SVR()
svr_model.fit(X_train, y_train)



# Make predictions on the test set
svr_pred = svr_model.predict(X_test)

# Evaluate the models
svr_rmse = mean_squared_error(y_test, svr_pred, squared=False)

# Print the RMSE scores
print("SVR RMSE:", svr_rmse)


In [None]:

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

# Initialize and train the models
mlp_model = MLPRegressor()
mlp_model.fit(X_train, y_train)

# Make predictions on the test set
mlp_pred = mlp_model.predict(X_test)

# Evaluate the models
mlp_rmse = mean_squared_error(y_test, mlp_pred, squared=False)

# Print the RMSE scores
print("MLP RMSE:", mlp_rmse)


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

# Define a list of regression models
models = [
    LinearRegression(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    SVR(),
    MLPRegressor()
]

# Train and evaluate each model
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"{type(model).__name__} RMSE: {rmse}")


LinearRegression RMSE: 0.3333837948036266
DecisionTreeRegressor RMSE: 21.94298567652289
RandomForestRegressor RMSE: 16.49298319347959


In [None]:
#import pandas as pd

# Read the CSV file
#df = pd.read_csv('o.csv')

# Remove rows with 'AM' or 'PM' in the 'INCIDENT_DATETIME' column
#column_name = 'INCIDENT_DATETIME'
#rows_to_exclude = ['AM', 'PM']
#df_filtered = df[~df[column_name].str.contains('|'.join(rows_to_exclude), na=False)]

# Create a new CSV file with the filtered data
#output_file = 'dispatch.csv'
#df_filtered.to_csv(output_file, index=False)

#print("Filtered data saved to", output_file)
