In [1]:
!python --version

Python 3.13.5


# MySQL connection with jupyter notebook

### install mysql-connector-python

In [12]:
#mysql connection with python
!conda install -c anaconda mysql-connector-python -y

### Connection with mysql

In [None]:
try:
    import mysql.connector as sql
    conn = sql.connect(
        host='localhost', 
        user='root', 
        password='cap4770',
        database= 'cap4770',  
        use_pure = True)
    
    cursor = conn.cursor(buffered=True)
    cursor.execute("use cap4770")
    
    # Check what tables are available
    cursor.execute("show tables")
    tables = cursor.fetchall()
    print("Available tables in the database:")
    for table in tables:
        print(f"- {table[0]}")
    
except Exception as e:
    print(f"Database connection error: {e}")
    print("Will work with CSV file instead.")

<mysql.connector.connection.MySQLConnection at 0x32f5d0620>

In [58]:
# install numpy, pandas and matplotlib using pip at terminal

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Storm Events Data Analysis

Let's explore the storm events data from the CSV file and database.

In [3]:
# Read the storm events CSV file directly
storm_df = pd.read_csv('StormEvents_details-ftp_v1.0_d2011_c20250520.csv')

# Display basic information about the dataset
print("Dataset Shape:", storm_df.shape)
print("\nColumn Names:")
print(storm_df.columns.tolist())
print("\nFirst few rows:")
storm_df.head()

Dataset Shape: (79091, 51)

Column Names:
['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH', 'END_DAY', 'END_TIME', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_FIPS', 'YEAR', 'MONTH_NAME', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME', 'WFO', 'BEGIN_DATE_TIME', 'CZ_TIMEZONE', 'END_DATE_TIME', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE', 'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE', 'TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE', 'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE', 'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH', 'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON', 'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'DATA_SOURCE']

First few rows:


Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,201102,27,300,201102,27,1500,49358,289076,NEW HAMPSHIRE,33,...,,,,,,,,A weak area of low pressure moved eastward out...,A total of 6 to 8 inches of snow fell across H...,CSV
1,201101,18,630,201101,18,1630,47655,277847,NEW HAMPSHIRE,33,...,,,,,,,,Area of low pressure developed and moved up th...,Between 6 to 8 inches of snow fell across East...,CSV
2,201101,21,400,201101,21,1200,47661,277878,NEW HAMPSHIRE,33,...,,,,,,,,An area of low pressure lifted northeast throu...,Between 5 to 7 inches of snow fell across East...,CSV
3,201101,27,100,201101,27,1000,47986,280010,NEW HAMPSHIRE,33,...,,,,,,,,A strong low pressure system moved up the coas...,Four to nine inches of snow fell across easter...,CSV
4,201101,12,200,201101,12,2200,48019,280156,NEW HAMPSHIRE,33,...,,,,,,,,A developing nor'easter coastal storm dumped u...,Fifteen to twenty inches of snow fell across e...,CSV


In [4]:
# Display data types and basic information
print("Data Types:")
print(storm_df.dtypes)
print("\nDataset Info:")
storm_df.info()

# Show summary statistics for numerical columns
print("\nSummary Statistics:")
storm_df.describe()

Data Types:
BEGIN_YEARMONTH         int64
BEGIN_DAY               int64
BEGIN_TIME              int64
END_YEARMONTH           int64
END_DAY                 int64
END_TIME                int64
EPISODE_ID              int64
EVENT_ID                int64
STATE                  object
STATE_FIPS              int64
YEAR                    int64
MONTH_NAME             object
EVENT_TYPE             object
CZ_TYPE                object
CZ_FIPS                 int64
CZ_NAME                object
WFO                    object
BEGIN_DATE_TIME        object
CZ_TIMEZONE            object
END_DATE_TIME          object
INJURIES_DIRECT         int64
INJURIES_INDIRECT       int64
DEATHS_DIRECT           int64
DEATHS_INDIRECT         int64
DAMAGE_PROPERTY        object
DAMAGE_CROPS           object
SOURCE                 object
MAGNITUDE             float64
MAGNITUDE_TYPE         object
FLOOD_CAUSE            object
CATEGORY              float64
TOR_F_SCALE            object
TOR_LENGTH            float6

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE_FIPS,YEAR,...,CATEGORY,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_FIPS,BEGIN_RANGE,END_RANGE,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
count,79091.0,79091.0,79091.0,79091.0,79091.0,79091.0,79091.0,79091.0,79091.0,79091.0,...,16.0,2074.0,2074.0,384.0,52207.0,52207.0,52207.0,52207.0,52207.0,52207.0
mean,201105.661921,14.76594,1266.181158,201105.661921,17.142848,1520.211579,52629.991693,314559.899179,31.565943,2011.0,...,1.0,4.021123,243.468645,95.304688,2.168713,2.205164,38.086579,-88.9207,38.084008,-88.90896
std,2.717539,9.383267,704.50108,2.717539,9.320915,625.867758,3615.151336,24495.063745,17.733711,0.0,...,0.0,5.519442,357.19034,68.654988,3.962235,4.013313,4.724869,10.22568,4.722845,10.221412
min,201101.0,1.0,0.0,201101.0,1.0,0.0,45914.0,266353.0,1.0,2011.0,...,1.0,0.01,1.0,1.0,0.0,0.0,-14.3466,-170.8868,-14.3893,-170.8292
25%,201104.0,6.0,702.0,201104.0,9.0,1200.0,49632.0,293771.5,19.0,2011.0,...,1.0,0.57,50.0,49.0,0.0,1.0,34.83,-95.6,34.83,-95.59085
50%,201106.0,15.0,1500.0,201106.0,19.0,1640.0,52264.0,314604.0,30.0,2011.0,...,1.0,2.0,100.0,89.0,1.0,1.0,38.252,-88.1932,38.2538,-88.18
75%,201107.0,23.0,1815.0,201107.0,25.0,1950.0,55588.0,335392.5,45.0,2011.0,...,1.0,4.9,250.0,125.5,3.0,3.0,41.32015,-81.62,41.32055,-81.60955
max,201112.0,31.0,2359.0,201112.0,31.0,2359.0,79994.0,588494.0,99.0,2011.0,...,1.0,41.79,2900.0,510.0,181.0,181.0,66.9051,144.8585,66.9114,144.759


In [5]:
# Check for missing values
print("Missing Values:")
missing_values = storm_df.isnull().sum()
print(missing_values[missing_values > 0])

# Display sample of data with better formatting
print("\nSample of the data:")
storm_df.head(10)

Missing Values:
DAMAGE_PROPERTY       13693
DAMAGE_CROPS          14069
MAGNITUDE             34364
MAGNITUDE_TYPE        51746
FLOOD_CAUSE           71939
CATEGORY              79075
TOR_F_SCALE           77017
TOR_LENGTH            77017
TOR_WIDTH             77017
TOR_OTHER_WFO         78707
TOR_OTHER_CZ_STATE    78707
TOR_OTHER_CZ_FIPS     78707
TOR_OTHER_CZ_NAME     78707
BEGIN_RANGE           26884
BEGIN_AZIMUTH         26884
BEGIN_LOCATION        26884
END_RANGE             26884
END_AZIMUTH           26884
END_LOCATION          26884
BEGIN_LAT             26884
BEGIN_LON             26884
END_LAT               26884
END_LON               26884
EVENT_NARRATIVE       24299
dtype: int64

Sample of the data:


Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,201102,27,300,201102,27,1500,49358,289076,NEW HAMPSHIRE,33,...,,,,,,,,A weak area of low pressure moved eastward out...,A total of 6 to 8 inches of snow fell across H...,CSV
1,201101,18,630,201101,18,1630,47655,277847,NEW HAMPSHIRE,33,...,,,,,,,,Area of low pressure developed and moved up th...,Between 6 to 8 inches of snow fell across East...,CSV
2,201101,21,400,201101,21,1200,47661,277878,NEW HAMPSHIRE,33,...,,,,,,,,An area of low pressure lifted northeast throu...,Between 5 to 7 inches of snow fell across East...,CSV
3,201101,27,100,201101,27,1000,47986,280010,NEW HAMPSHIRE,33,...,,,,,,,,A strong low pressure system moved up the coas...,Four to nine inches of snow fell across easter...,CSV
4,201101,12,200,201101,12,2200,48019,280156,NEW HAMPSHIRE,33,...,,,,,,,,A developing nor'easter coastal storm dumped u...,Fifteen to twenty inches of snow fell across e...,CSV
5,201110,29,1500,201110,30,800,57676,350515,NEW HAMPSHIRE,33,...,,,,,,,,A rare and historic October Nor'easter brought...,Nine to twenty-one inches of snow fell across ...,CSV
6,201108,28,853,201108,28,1453,57538,345212,NEW HAMPSHIRE,33,...,,,,,,,,Hurricane Irene formed east of the Caribbean i...,Trees and wires were downed in Pelham on Mount...,CSV
7,201110,30,120,201110,30,120,57676,352765,NEW HAMPSHIRE,33,...,,,,,,,,A rare and historic October Nor'easter brought...,"A tree was downed on Route 3 near exit 2, bloc...",CSV
8,201107,21,1600,201107,21,1800,55656,333708,NEW HAMPSHIRE,33,...,,,,,,,,A strong upper level ridge brought very hot te...,The Automated Weather Observation System at Bo...,CSV
9,201102,1,900,201102,2,1200,48483,283307,NEW HAMPSHIRE,33,...,,,,,,,,A series of low pressure centers impacted the ...,A total of 10-14 inches of snow fell across Hi...,CSV


In [None]:
# If the data has been uploaded to the database, query it from there
# First, let's reconnect to the database
try:
    import mysql.connector as sql
    conn = sql.connect(
        host='localhost', 
        user='root', 
        password='cap4770',
        database= 'cap4770',  
        use_pure = True)
    
    cursor = conn.cursor(buffered=True)
    cursor.execute("use cap4770")
    
    # Check what tables are available
    cursor.execute("show tables")
    tables = cursor.fetchall()
    print("Available tables in the database:")
    for table in tables:
        print(f"- {table[0]}")
    
except Exception as e:
    print(f"Database connection error: {e}")
    print("Will work with CSV file instead.")

Available tables in the database:
- stormevents_details


In [8]:
# Query storm events data from database (assuming table name is 'storm_events' or similar)
# You may need to adjust the table name based on how you uploaded the data

try:
    # Try common table names for storm events data
    possible_table_names = ['storm_events', 'stormevents', 'storm_data', 'StormEvents_details']
    
    storm_table_name = None
    for table_name in possible_table_names:
        try:
            cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
            count = cursor.fetchone()[0]
            storm_table_name = table_name
            print(f"Found storm events table: {table_name} with {count} records")
            break
        except:
            continue
    
    if storm_table_name:
        # Query the first 10 records from the database
        query = f"SELECT * FROM {storm_table_name} LIMIT 10"
        storm_df_db = pd.read_sql(query, conn)
        print(f"\nData from database table '{storm_table_name}':")
        print(f"Shape: {storm_df_db.shape}")
        storm_df_db.head()
    else:
        print("Storm events table not found in database. Please check the table name.")
        
except Exception as e:
    print(f"Error querying database: {e}")
    print("Working with CSV file data instead.")

Found storm events table: StormEvents_details with 10643 records

Data from database table 'StormEvents_details':
Shape: (10, 51)

Data from database table 'StormEvents_details':
Shape: (10, 51)


  storm_df_db = pd.read_sql(query, conn)
