In [None]:
#imports
import pandas as pd

In [None]:
# First, let's reconnect to the database
try:
    import mysql.connector as sql
    conn = sql.connect(
        host='localhost', 
        user='root', 
        password='cap4770',
        database= 'cap4770',  
        use_pure = True)
    
    cursor = conn.cursor(buffered=True)
    cursor.execute("use cap4770")
    print("Connected to database established.")
    # Check what tables are available
    cursor.execute("show tables")
    tables = cursor.fetchall()
    print("Available tables:")
    for table in tables:
        print(f"- {table[0]}")
    
except Exception as e:
    print(f"Database connection error: {e}")

In [None]:
# Count records in the storm events CSV file
storm_df = pd.read_csv('StormEvents_details-ftp_v1.0_d2011_c20250520.csv')
csv_record_count = len(storm_df)
print(f"Total records in CSV file: {csv_record_count:,}")
print(f"CSV file shape: {storm_df.shape}")
print(f"Columns: {storm_df.shape[1]}, Rows: {storm_df.shape[0]}")

# Compare with database count
if 'storm_table_name' in locals() and storm_table_name:
    try:
        cursor.execute(f"SELECT COUNT(*) FROM {storm_table_name}")
        db_record_count = cursor.fetchone()[0]
        print(f"\nDatabase table '{storm_table_name}' records: {db_record_count:,}")
        print(f"Records match: {csv_record_count == db_record_count}")
    except Exception as e:
        print(f"Error getting database count: {e}")
else:
    print("\nDatabase connection not available for comparison")

In [None]:
# Install SQLAlchemy for efficient database uploads
!pip install sqlalchemy

In [None]:
# Upload all CSV records to database
import sqlalchemy
from sqlalchemy import create_engine

try:
    # Create SQLAlchemy engine for better pandas integration
    engine = create_engine(f"mysql+mysqlconnector://root:cap4770@localhost/cap4770")
    
    # Read the full CSV file
    print("Reading full CSV file...")
    storm_df_full = pd.read_csv('StormEvents_details-ftp_v1.0_d2011_c20250520.csv')
    print(f"CSV records loaded: {len(storm_df_full):,}")
    
    # Option 1: Replace the entire table with all CSV data
    print("\nUploading all records to database...")
    print("This may take a few minutes...")
    
    # Upload to database (replace existing table)
    storm_df_full.to_sql('StormEvents_details', con=engine, if_exists='replace', index=False, chunksize=1000)
    
    print("Upload completed!")
    
    # Verify the upload
    cursor.execute(f"SELECT COUNT(*) FROM StormEvents_details")
    new_db_count = cursor.fetchone()[0]
    print(f"New database record count: {new_db_count:,}")
    print(f"Upload successful: {new_db_count == len(storm_df_full)}")
    
except Exception as e:
    print(f"Error during upload: {e}")
    print("You may need to install sqlalchemy: pip install sqlalchemy")
    
    # Alternative approach using direct MySQL connector (slower but doesn't require SQLAlchemy)
    print("\nTrying alternative upload method...")
    try:
        # Clear existing table first
        cursor.execute("DELETE FROM StormEvents_details")
        conn.commit()
        
        # Prepare insert statement
        columns = storm_df_full.columns.tolist()
        placeholders = ', '.join(['%s'] * len(columns))
        insert_query = f"INSERT INTO StormEvents_details ({', '.join(columns)}) VALUES ({placeholders})"
        
        # Insert data in chunks
        chunk_size = 1000
        total_rows = len(storm_df_full)
        
        for i in range(0, total_rows, chunk_size):
            chunk = storm_df_full.iloc[i:i+chunk_size]
            data_tuples = [tuple(row) for row in chunk.values]
            cursor.executemany(insert_query, data_tuples)
            conn.commit()
            print(f"Uploaded {min(i+chunk_size, total_rows)}/{total_rows} records...")
        
        print("Alternative upload completed!")
        
        # Verify
        cursor.execute(f"SELECT COUNT(*) FROM StormEvents_details")
        new_db_count = cursor.fetchone()[0]
        print(f"Final database record count: {new_db_count:,}")
        
    except Exception as e2:
        print(f"Alternative upload also failed: {e2}")
        print("Manual database upload may be required.")

In [None]:
# Verify that all records are now in the database
print("=== UPLOAD VERIFICATION ===")

# Get current counts
csv_count = len(pd.read_csv('StormEvents_details-ftp_v1.0_d2011_c20250520.csv'))
cursor.execute(f"SELECT COUNT(*) FROM StormEvents_details")
db_count = cursor.fetchone()[0]

print(f"CSV file records: {csv_count:,}")
print(f"Database records: {db_count:,}")
print(f"Records match: {csv_count == db_count}")

if csv_count == db_count:
    print("\n✅ SUCCESS: All CSV records have been uploaded to the database!")
    print("Your damage analysis will now use the complete dataset of 79,091 records.")
else:
    print(f"\n❌ WARNING: Record count mismatch!")
    print(f"Missing records: {csv_count - db_count:,}")

# Show some basic stats about the complete database
print(f"\n=== DATABASE STATISTICS ===")
cursor.execute("SELECT COUNT(DISTINCT event_type) FROM StormEvents_details")
event_types = cursor.fetchone()[0]
print(f"Total unique event types: {event_types}")

cursor.execute("SELECT MIN(BEGIN_DATE_TIME), MAX(BEGIN_DATE_TIME) FROM StormEvents_details")
date_range = cursor.fetchone()
print(f"Date range: {date_range[0]} to {date_range[1]}")