In [None]:
import pandas as pd
file_path = r'C:\Users\siddh\Desktop\DV_Final_Project\Significant_Earthquakes.csv'
df = pd.read_csv(file_path)

#Selecting main Columns
columns_needed = ['time', 'latitude', 'longitude', 'depth', 'mag', 'place', 'type']
df_clean = df[columns_needed].copy()

#Removing Missing Values
df_clean.dropna(subset=['time', 'latitude', 'longitude', 'depth', 'mag'], inplace=True)

df_clean['time'] = pd.to_datetime(df_clean['time'], errors='coerce')
df_clean.dropna(subset=['time'], inplace=True)

#Removing Inconsistent Values
df_clean = df_clean[(df_clean['mag'] >= 0) & (df_clean['mag'] <= 10)]

#categorize Magnitude into 3 Levels
def categorize_magnitude(mag):
    if mag < 4:
        return 'Low'
    elif 4 <= mag < 7:
        return 'Strong'
    else:
        return 'Destructive'

df_clean['Mag_Level'] = df_clean['mag'].apply(categorize_magnitude)

#Extracting Year and Month
df_clean['Year'] = df_clean['time'].dt.year
df_clean['Month'] = df_clean['time'].dt.month
df_clean['place'] = df_clean['place'].str.strip().str.title()
df_clean['type'] = df_clean['type'].str.lower()

#Optimize Data Types
df_clean['type'] = df_clean['type'].astype('category')
df_clean['Mag_Level'] = df_clean['Mag_Level'].astype('category')
#Extracting Country
df_clean['Country'] = df_clean['place'].apply(lambda x: x.split(',')[-1].strip() if pd.notnull(x) and ',' in x else 'Unknown')

#Remove Duplicate Records
df_clean.drop_duplicates(inplace=True)

df_clean.to_csv(r'C:\Users\siddh\Desktop\DV_Final_Project\Final_Earthquake_Cleaned_Dataset.csv', index=False)
print(f"✅ Preprocessing Complete! Final dataset shape: {df_clean.shape}")
print(df_clean.head())


✅ Preprocessing Complete! Final dataset shape: (102040, 11)
                               time  latitude  longitude  depth   mag  \
16 1904-04-04 10:26:00.880000+00:00    41.758     23.249   15.0  7.02   
17 1904-04-04 10:02:34.560000+00:00    41.802     23.108   15.0  6.84   
18 1904-06-25 21:00:38.720000+00:00    52.763    160.277   30.0  7.70   
19 1904-06-25 14:45:39.140000+00:00    51.424    161.638   15.0  7.50   
20 1904-08-30 11:43:20.850000+00:00    30.684    100.608   15.0  7.09   

                                             place        type    Mag_Level  \
16               7 Km Se Of Stara Kresna, Bulgaria  earthquake  Destructive   
17                6 Km W Of Stara Kresna, Bulgaria  earthquake       Strong   
18  115 Km Ese Of Petropavlovsk-Kamchatsky, Russia  earthquake  Destructive   
19   274 Km Se Of Petropavlovsk-Kamchatsky, Russia  earthquake  Destructive   
20                   150 Km Wnw Of Kangding, China  earthquake  Destructive   

    Year  Month   Country 