In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report
from sklearn.base import BaseEstimator
from sklearn.impute import SimpleImputer
import pandas as pd
import numpy as np
import sys
import os
import joblib

In [3]:
df = pd.read_csv(r"C:\Users\bunyo\onedrive\desktop\AI_Course\ModularProgramProjects\Second_week_project\data\scrapped_data\earthquake.csv")

In [4]:
df.info()

<class 'pandas.DataFrame'>
RangeIndex: 9088 entries, 0 to 9087
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          9088 non-null   str    
 1   Location      9088 non-null   str    
 2   Lat_degrees   9087 non-null   float64
 3   Long_degrees  9088 non-null   float64
 4   Depth         9088 non-null   int64  
 5   Type          9088 non-null   str    
 6   Magnituda     9009 non-null   float64
 7   Network       9088 non-null   str    
dtypes: float64(3), int64(1), str(4)
memory usage: 568.1 KB


In [5]:
df.head()

Unnamed: 0,Date,Location,Lat_degrees,Long_degrees,Depth,Type,Magnituda,Network
0,2026-02-22 06:58:41.0,MOLUCCA SEA,1.23,126.1,31,origin,3.0,BMKG*
1,2026-02-22 06:57:13.0,BANDA SEA,-6.65,127.48,419,origin,3.9,BMKG*
2,2026-02-22 06:49:01.9,WESTERN TURKEY,37.423,29.746,17,origin,1.6,KOERI
3,2026-02-22 06:43:54.5,NORTHERN PERU,-4.607,-76.353,128,origin,4.9,GFZ
4,2026-02-22 06:43:54.2,NORTHERN PERU,-4.527,-76.285,112,origin,5.1,SC4


In [8]:

# Magnitude ustunini ajratish
bins = [0, 3, 5, np.inf]
labels = ['Xavfsiz', 'Xavfli', 'Juda xavfli']
df['Risk_Level'] = pd.cut(df['Magnituda'], bins=bins, labels=labels)

df = df.dropna(subset=['Risk_Level'])
X = df.drop(columns=['Magnituda','Risk_Level']) 
y = df['Risk_Level']

preprocessor = ColumnTransformer(
    transformers=[

        ('cat_encoding', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), ['Type']),

        ('drop_cols', 'drop', ['Date', 'Location', 'Network'])
    ],
    remainder='passthrough' 
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestClassifier(n_estimators=99, random_state=77))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=49)


pipeline.fit(X_train, y_train)

# modelni saqlash
path = r"C:\Users\bunyo\onedrive\desktop\AI_Course\ModularProgramProjects\Second_week_project\offline_testing\pipline_model"
model_name = "earthquake_model.joblib"
model_path = os.path.join(path, model_name)
joblib.dump(pipeline, model_path)


y_pred = pipeline.predict(X_test)
print("\n Model Performance ")
print(classification_report(y_test, y_pred))


 Model Performance 
              precision    recall  f1-score   support

 Juda xavfli       0.84      0.73      0.78       181
      Xavfli       0.75      0.75      0.75       591
     Xavfsiz       0.88      0.90      0.89      1030

    accuracy                           0.83      1802
   macro avg       0.82      0.79      0.81      1802
weighted avg       0.83      0.83      0.83      1802

