In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sqlalchemy
from sqlalchemy import create_engine, func, inspect, text

In [2]:
df = pd.read_csv("tornados.csv")
df.head(10)

Unnamed: 0,om,yr,mo,dy,date,time,tz,datetime_utc,st,stf,...,elon,len,wid,ns,sn,f1,f2,f3,f4,fc
0,192,1950,10,1,10/1/1950,21:00:00,America/Chicago,1950-10-02T03:00:00Z,OK,40,...,-102.3,15.8,10,1,1,25,0,0,0,False
1,193,1950,10,9,10/9/1950,2:15:00,America/Chicago,1950-10-09T08:15:00Z,NC,37,...,0.0,2.0,880,1,1,47,0,0,0,False
2,195,1950,11,20,11/20/1950,2:20:00,America/Chicago,1950-11-20T08:20:00Z,KY,21,...,0.0,0.1,10,1,1,177,0,0,0,False
3,196,1950,11,20,11/20/1950,4:00:00,America/Chicago,1950-11-20T10:00:00Z,KY,21,...,0.0,0.1,10,1,1,209,0,0,0,False
4,197,1950,11,20,11/20/1950,7:30:00,America/Chicago,1950-11-20T13:30:00Z,MS,28,...,0.0,2.0,37,1,1,101,0,0,0,False
5,194,1950,11,4,11/4/1950,17:00:00,America/Chicago,1950-11-04T23:00:00Z,PA,42,...,-75.93,15.9,100,1,1,71,11,0,0,False
6,198,1950,12,2,12/2/1950,15:00:00,America/Chicago,1950-12-02T21:00:00Z,IL,17,...,-89.72,18.8,50,1,1,119,117,0,0,False
7,199,1950,12,2,12/2/1950,16:00:00,America/Chicago,1950-12-02T22:00:00Z,IL,17,...,-89.38,18.0,200,1,1,119,5,0,0,False
8,200,1950,12,2,12/2/1950,16:25:00,America/Chicago,1950-12-02T22:25:00Z,AR,5,...,-91.72,7.8,10,1,1,65,0,0,0,False
9,201,1950,12,2,12/2/1950,17:30:00,America/Chicago,1950-12-02T23:30:00Z,IL,17,...,-89.62,9.6,50,1,1,157,0,0,0,False


In [3]:
engine = create_engine("sqlite:///tornados.sqlite")

df.to_sql("tornados", con=engine, if_exists="replace", index=False)

68693

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68693 entries, 0 to 68692
Data columns (total 27 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   om            68693 non-null  int64  
 1   yr            68693 non-null  int64  
 2   mo            68693 non-null  int64  
 3   dy            68693 non-null  int64  
 4   date          68693 non-null  object 
 5   time          68693 non-null  object 
 6   tz            68693 non-null  object 
 7   datetime_utc  68693 non-null  object 
 8   st            68693 non-null  object 
 9   stf           68693 non-null  int64  
 10  mag           67937 non-null  float64
 11  inj           68693 non-null  int64  
 12  fat           68693 non-null  int64  
 13  loss          41523 non-null  float64
 14  slat          68693 non-null  float64
 15  slon          68693 non-null  float64
 16  elat          68693 non-null  float64
 17  elon          68693 non-null  float64
 18  len           68693 non-nu

In [5]:
# explore and understand the data
# Create the inspector and connect it to the engine
inspector = inspect(engine)

# Collect the names of tables within the database
tables = inspector.get_table_names()

# Using the inspector to print the column names within each table and its types
for table in tables:
    print(table)
    columns = inspector.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])
        
    print()

tornados
om BIGINT
yr BIGINT
mo BIGINT
dy BIGINT
date TEXT
time TEXT
tz TEXT
datetime_utc TEXT
st TEXT
stf BIGINT
mag FLOAT
inj BIGINT
fat BIGINT
loss FLOAT
slat FLOAT
slon FLOAT
elat FLOAT
elon FLOAT
len FLOAT
wid BIGINT
ns BIGINT
sn BIGINT
f1 BIGINT
f2 BIGINT
f3 BIGINT
f4 BIGINT
fc BOOLEAN



In [6]:
tz = "America/Chicago"

# allow the user to select ALL or a specific location
if tz == "All":
    where_clause = "1=1"
else:
    where_clause = f"tz = '{tz}'"

query = f"""
        SELECT
            *
        FROM
            tornados
        WHERE
            {where_clause};
"""

print(query)


        SELECT
            *
        FROM
            tornados
        WHERE
            tz = 'America/Chicago';



In [7]:
df_map = pd.read_sql(text(query), con=engine)
df_map.head()

Unnamed: 0,om,yr,mo,dy,date,time,tz,datetime_utc,st,stf,...,elon,len,wid,ns,sn,f1,f2,f3,f4,fc
0,192,1950,10,1,10/1/1950,21:00:00,America/Chicago,1950-10-02T03:00:00Z,OK,40,...,-102.3,15.8,10,1,1,25,0,0,0,0
1,193,1950,10,9,10/9/1950,2:15:00,America/Chicago,1950-10-09T08:15:00Z,NC,37,...,0.0,2.0,880,1,1,47,0,0,0,0
2,195,1950,11,20,11/20/1950,2:20:00,America/Chicago,1950-11-20T08:20:00Z,KY,21,...,0.0,0.1,10,1,1,177,0,0,0,0
3,196,1950,11,20,11/20/1950,4:00:00,America/Chicago,1950-11-20T10:00:00Z,KY,21,...,0.0,0.1,10,1,1,209,0,0,0,0
4,197,1950,11,20,11/20/1950,7:30:00,America/Chicago,1950-11-20T13:30:00Z,MS,28,...,0.0,2.0,37,1,1,101,0,0,0,0


In [8]:
# allow the user to select ALL or a specific state
if tz == "All":
    where_clause = "1=1"
else:
    where_clause = f"tz = '{tz}'"
    
query = f"""
    SELECT
        st,
        tz,
        count(*) as num_tornados
    FROM
        tornados
    WHERE
        {where_clause}
    GROUP BY
        tz,
        st
    ORDER BY
        num_tornados desc
    LIMIT 10;
"""

df_bar = pd.read_sql(text(query), con=engine)
df_bar.head(10)

Unnamed: 0,st,tz,num_tornados
0,TX,America/Chicago,9265
1,KS,America/Chicago,4429
2,OK,America/Chicago,4144
3,FL,America/Chicago,3566
4,NE,America/Chicago,2993
5,IA,America/Chicago,2815
6,IL,America/Chicago,2716
7,MS,America/Chicago,2594
8,AL,America/Chicago,2456
9,MO,America/Chicago,2441


In [9]:
tz = "America/Denver"

# allow the user to select ALL or a specific location
if tz == "All":
    where_clause = "1=1"
else:
    where_clause = f"tz = '{tz}'"

query = f"""
        SELECT
            *
        FROM
            tornados
        WHERE
            {where_clause};
"""

print(query)


        SELECT
            *
        FROM
            tornados
        WHERE
            tz = 'America/Denver';



In [10]:
df_map = pd.read_sql(text(query), con=engine)
df_map.head()

Unnamed: 0,om,yr,mo,dy,date,time,tz,datetime_utc,st,stf,...,elon,len,wid,ns,sn,f1,f2,f3,f4,fc
0,216,1965,5,5,5/5/1965,14:45:00,America/Denver,1965-05-05T20:45:00Z,SD,46,...,-96.38,34.7,10,1,1,11,0,0,0,0
1,501,1986,7,1,7/1/1986,22:15:00,America/Denver,1986-07-02T04:15:00Z,NM,35,...,0.0,0.2,10,1,1,25,0,0,0,0
2,656,1986,9,4,9/4/1986,18:55:00,America/Denver,1986-09-05T00:55:00Z,NM,35,...,0.0,0.1,10,1,1,5,0,0,0,0
3,419,1990,5,24,5/24/1990,15:00:00,America/Denver,1990-05-24T21:00:00Z,MT,30,...,0.0,0.2,10,1,1,9,0,0,0,0
4,422,1990,5,24,5/24/1990,16:00:00,America/Denver,1990-05-24T22:00:00Z,MT,30,...,0.0,0.2,10,1,1,111,0,0,0,0


In [11]:
if tz == "All":
    where_clause = "1=1"
else:
    where_clause = f"tz = '{tz}'"
    
query = f"""
    SELECT
        st,
        tz,
        count(*) as num_tornados
    FROM
        tornados
    WHERE
        {where_clause}
    GROUP BY
        tz,
        st
    ORDER BY
        num_tornados desc
    LIMIT 10;
"""

df_bar = pd.read_sql(text(query), con=engine)
df_bar.head()

Unnamed: 0,st,tz,num_tornados
0,SD,America/Denver,19
1,MT,America/Denver,6
2,NM,America/Denver,2
3,WY,America/Denver,1
