# Welcome to Zohaib's Crime Data Zoo!

## Data Gathering & Pre-Processing!
### First, import modules.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier


### Data Gathering.

In [None]:
chicagoDF = pd.read_csv('file1.csv')
losAngelesDF = pd.read_csv('file2.csv')

#### Understanding structure of each dataset.

In [None]:
chicagoDF.head()
chicagoDF.info()
chicagoDF.describe()


In [None]:
losAngelesDF.head()
losAngelesDF.info()
losAngelesDF.describe()


#### Data Preprocessing.

In [None]:
# Handling Missing Values
chicagoDF.dropna(inplace=True)
losAngelesDF.dropna(inplace=True)

# Scaling Numeric Features (if needed)
scaler = StandardScaler()
chicagoDF[['Longitude']] = scaler.fit_transform(chicagoDF[['Longitude']])
losAngelesDF[['Longitude']] = scaler.transform(losAngelesDF[['Longitude']])


#### Do some ML.
##### Data Split.

In [None]:
X1 = chicagoDF.drop('Longitude', axis=1)
y1 = chicagoDF['Longitude']
X2 = losAngelesDF.drop('Longitude', axis=1)
y2 = losAngelesDF['Longitude']

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)


##### Machine Learn

In [None]:
# Model for chi
model1 = RandomForestClassifier()
model1.fit(X1_train, y1_train)
y1_pred = model1.predict(X1_test)

# Model for la
model2 = RandomForestClassifier()
model2.fit(X2_train, y2_train)
y2_pred = model2.predict(X2_test)


##### Evaluation & CHARTS.

In [None]:
# Evaluation for data1
print("Model 1 Evaluation:")
print("Accuracy:", accuracy_score(y1_test, y1_pred))
print("Confusion Matrix:")
print(confusion_matrix(y1_test, y1_pred))
print("Classification Report:")
print(classification_report(y1_test, y1_pred))

# Evaluation for data2
print("\nModel 2 Evaluation:")
print("Accuracy:", accuracy_score(y2_test, y2_pred))
print("Confusion Matrix:")
print(confusion_matrix(y2_test, y2_pred))
print("Classification Report:")
print(classification_report(y2_test, y2_pred))


In [None]:
# Example of a histogram
plt.hist(chicagoDF['numeric_column'], bins=20, alpha=0.5, label='Chicago')
plt.hist(losAngelesDF['numeric_column'], bins=20, alpha=0.5, label='Los Angeles')
plt.legend()
plt.xlabel('Numeric Column')
plt.ylabel('Frequency')
plt.title('Distribution of Numeric Column')
plt.show()