In [73]:
import pandas as pd

df = pd.read_csv('1950-2019_all_tornadoes.csv')
df.head()
# Transformation 1 - Creates DataFrame and drops columns that are irrelevant for the intended purpose.
df_coldrop = df.drop(columns=['om', 'stf', 'stn', 'tz', 'ns', 'sn', 'sg', 'yr', 'mo', 'dy', 'f1','f2','f3','f4','fc'])
# Transformation 2 - Creates a column for datetime by combining 'date' and 'time' columns. Defines format for time, and drops the original columns.
df_coldrop['datetime'] = pd.to_datetime(df_coldrop['date'] + ' ' + df_coldrop['time'], format='%Y-%m-%d %H:%M:%S')
df_coldrop.drop(columns=['date', 'time'], inplace=True)
# Transformation 3 - Renames abbreviated columns for uniformity.
df_coldrop.rename(columns={
    'st' : 'state',
    'inj' : 'injuries',
    'fat' : 'fatalities',
    'len' : 'length',
    'wid' : 'width',
    'mag' : 'F-Scale'
}, inplace=True)
# Transformation 4 - Combines lat and lon into a single column as tuples and drops the original columns
df_coldrop.loc[:, 'start_coords'] = df_coldrop.apply(lambda row: (row['slat'], row['slon']), axis=1)
df_coldrop.loc[:, 'end_coords'] = df_coldrop.apply(lambda row: (row['elat'], row['elon']), axis=1)
df_coldrop.drop(columns=['slat', 'slon', 'elat', 'elon'], inplace=True)
# Transformation 5 - Changes the order of columns to read more fluently.
ordered = ['state', 'start_coords', 'end_coords', 'datetime', 'length', 'width', 'F-Scale', 'injuries', 'fatalities']
df_coldrop = df_coldrop[ordered]
print(df_coldrop)


      state         start_coords           end_coords            datetime  \
0        MO      (38.77, -90.22)      (38.83, -90.03) 1950-01-03 11:00:00   
1        MO      (38.77, -90.22)      (38.82, -90.12) 1950-01-03 11:00:00   
2        IL      (38.82, -90.12)      (38.83, -90.03) 1950-01-03 11:10:00   
3        IL        (39.1, -89.3)      (39.12, -89.23) 1950-01-03 11:55:00   
4        OH      (40.88, -84.58)           (0.0, 0.0) 1950-01-03 16:00:00   
...     ...                  ...                  ...                 ...   
66383    MS  (33.1628, -89.4323)  (33.2339, -89.3298) 2019-12-29 16:03:00   
66384    MS  (33.2598, -89.2778)  (33.2879, -89.2208) 2019-12-29 16:13:00   
66385    MS   (33.472, -89.0315)   (33.4888, -88.991) 2019-12-29 16:32:00   
66386    MS  (32.5268, -89.1628)  (32.5581, -89.1215) 2019-12-29 17:13:00   
66387    AL  (34.7541, -87.0777)  (34.7946, -87.0041) 2019-12-29 18:50:00   

       length  width  F-Scale  injuries  fatalities  
0        9.50    150 

In [75]:
import requests
import pandas as pd

url = 'https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/historical-tornado-tracks/records?refine=datetime%3A%222015%22'
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    
    results = data['results']
    df_api = pd.json_normalize(results)
    print(df_api.columns)    
# Transformation 1 - Creates DataFrame and drops columns that are irrelevant for the intended purpose.
    columns_to_drop = [
        'objectid', 'om', 'yr', 'mo', 'dy', 'tz', 'stf', 'stn', 'loss', 'closs', 'fc', 
        'pre_1996_loss', 'globalid', 'creationdate', 'creator', 'editdate', 'editor', 'shape_length',
        'geo_point_2d.lon', 'geo_point_2d.lat', 'geo_shape.type', 'geo_shape.geometry.coordinates', 
        'geo_shape.geometry.type', 'dt'
    ]
    df_api_coldrop = df_api.drop(columns=columns_to_drop)
    
# Transformation 2 - Creates a column for datetime by combining 'date' and 'time' columns. Defines format for time, and drops the original columns.
    df_api_coldrop['datetime'] = pd.to_datetime(df_api_coldrop['date'] + ' ' + df_api_coldrop['time'], format='%Y-%m-%d %H:%M:%S')
    df_api_coldrop.drop(columns=['date', 'time'], inplace=True)
# Transformation 3 - Renames abbreviated columns for uniformity.
    df_api_coldrop.rename(columns={
        'st' : 'state',
        'inj' : 'injuries',
        'fat' : 'fatalities',
        'len' : 'length',
        'wid' : 'width',
        'mag' : 'F-Scale'
}, inplace=True)
# Transformation 4 - Combines lat and lon into a single column as tuples and drops the original columns
    df_api_coldrop.loc[:, 'start_coords'] = df_api_coldrop.apply(lambda row: (row['slat'], row['slon']), axis=1)
    df_api_coldrop.loc[:, 'end_coords'] = df_api_coldrop.apply(lambda row: (row['elat'], row['elon']), axis=1)
    df_api_coldrop.drop(columns=['slat', 'slon', 'elat', 'elon'], inplace=True)
# Transformation 5 - Changes the order of columns to read more fluently.
    ordered = ['state', 'start_coords', 'end_coords', 'datetime', 'length', 'width', 'F-Scale',
               'injuries', 'fatalities']
    df_api_coldrop = df_api_coldrop[ordered]
    print(df_api_coldrop.head())
else:
    print(f'Error: {response.status_code}')


Index(['objectid', 'om', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'stf',
       'stn', 'mag', 'inj', 'fat', 'loss', 'closs', 'slat', 'slon', 'elat',
       'elon', 'len', 'wid', 'fc', 'pre_1996_loss', 'globalid', 'creationdate',
       'creator', 'editdate', 'editor', 'shape_length', 'dt', 'datetime',
       'geo_point_2d.lon', 'geo_point_2d.lat', 'geo_shape.type',
       'geo_shape.geometry.coordinates', 'geo_shape.geometry.type'],
      dtype='object')
  state     start_coords       end_coords            datetime  length  width  \
0    GA  (32.17, -83.36)  (32.19, -83.32) 2015-01-04 10:44:00    2.18    100   
1    FL  (26.39, -80.14)  (26.39, -80.13) 2015-02-05 06:37:00    1.10     50   
2    OK  (36.17, -95.88)  (36.15, -95.85) 2015-03-25 17:01:00    2.30    600   
3    FL  (30.49, -85.95)  (30.49, -85.95) 2015-04-19 07:25:00    0.30     25   
4    KS  (38.35, -101.2)  (38.3, -101.12) 2015-04-24 16:09:00    5.32     75   

   F-Scale  injuries  fatalities  
0        0         0

In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Fetch and Parse HTML
url = 'https://data.usatoday.com/tornado-archive/'
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Using table_id, searches for the called on table
    table = soup.find('table', {'id': 'tornadoSummary'})
    
    if table:
        # Extracts Table Data
        rows = table.find_all('tr')
        
        data = []
        for row in rows:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols if ele.text.strip()]
            if cols:
                data.append(cols)
        
        # Adjusts columns based on actual data structure
        columns = ['Year', 'Number of Tornadoes', 'Direct Injury', 'Indirect Injury', 'Direct Fatality', 'Indirect Fatality', 'Property Damage', 'Crop Damage']

        # Transformation 1 - Creates DataFrame from table data.
        html_df = pd.DataFrame(data, columns=columns)

        # Transformation 2 - Merges direct and indirect Columns
        html_df['injuries'] = pd.to_numeric(html_df['Direct Injury'], errors='coerce') + pd.to_numeric(html_df['Indirect Injury'], errors='coerce')
        html_df['fatalities'] = pd.to_numeric(html_df['Direct Fatality'], errors='coerce') + pd.to_numeric(html_df['Indirect Fatality'], errors='coerce')

        # Transformation 3 - drops unnecessary columns.  
        html_df.drop(columns=['Direct Injury', 'Indirect Injury', 'Direct Fatality', 'Indirect Fatality'], inplace=True)

        # Transformation 4 - Cleans and converts Damage Columns
        html_df['Property Damage'] = html_df['Property Damage'].str.replace(',', '').str.replace('$', '') 
        html_df['Crop Damage'] = html_df['Crop Damage'].str.replace(',', '').str.replace('$', '')

        # Transformation 5 - Converts datatypes to Numeric
        html_df['Property Damage'] = pd.to_numeric(html_df['Property Damage'], errors='coerce')
        html_df['Crop Damage'] = pd.to_numeric(html_df['Crop Damage'], errors='coerce')

        print(html_df.head())
    else:
        print("Failed to locate the table with id 'tornadoSummary' on the webpage.")
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")


   Year Number of Tornadoes  Property Damage  Crop Damage  injuries  \
0   All              78,269      72913636490  524295260.0       NaN   
1  2024               1,664        935078800   13326200.0     544.0   
2  2023               1,523       1371376500    7300800.0     955.0   
3  2022               1,384        698683090    5550500.0     318.0   
4  2021               1,545        232623000    2238400.0     881.0   

   fatalities  
0         NaN  
1        45.0  
2        91.0  
3        25.0  
4       107.0  
