In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [None]:



discharge_data = pd.read_csv('discharge_2020.csv')
gage_height_data = pd.read_csv('gage_height_2020.csv')
weather_data = pd.read_csv('weather_2020.csv')

# Remove any non-numeric rows (e.g., first row with invalid data)
discharge_data_clean = discharge_data.iloc[1:].copy()  # Remove the first row and make a copy
gage_height_data_clean = gage_height_data.iloc[1:].copy()  # Remove the first row and make a copy

# Convert the datetime columns to a standard format for merging
discharge_data_clean['datetime'] = pd.to_datetime(discharge_data_clean['datetime'], format='%Y/%m/%d %H:%M')
gage_height_data_clean['datetime'] = pd.to_datetime(gage_height_data_clean['datetime'], format='%Y/%m/%d %H:%M')
weather_data['Date'] = pd.to_datetime(weather_data['Date'], format='%m/%d/%Y')

# Merge discharge and gage height on datetime
merged_data = pd.merge(discharge_data_clean, gage_height_data_clean, on='datetime', suffixes=('_discharge', '_gage'))

# Align with weather data using the date part of datetime
merged_data['date_only'] = merged_data['datetime'].dt.date
weather_data['date_only'] = weather_data['Date'].dt.date

# Final merge with weather data
final_data = pd.merge(merged_data, weather_data, on='date_only')

# Drop unnecessary columns
final_data_cleaned = final_data.drop(columns=['Date', 'date_only'])

# Drop unnecessary columns from the merged dataframe
columns_to_drop = [
    'agency_cd_discharge', 'site_no_discharge', 'tz_cd_discharge', '110247_00060_cd', 'Unnamed: 6_discharge',
    'agency_cd_gage', 'site_no_gage', 'tz_cd_gage', '110246_00065_cd', 'Unnamed: 6_gage',
    'Departure', 'HDD', 'CDD', 'New Snow', 'Snow Depth'
]

# Clean the final dataframe by dropping the specified columns
final_data_cleaned = final_data.drop(columns=columns_to_drop)

# Display the cleaned dataframe
print(final_data_cleaned.head())






# 1. Flood Significance Analysis Using Creek Data

In [None]:


# Set the chunk size
chunk_size = 10000  # You can adjust the chunk size based on your system's memory capacity

# Initialize variables to store max, mean, and count
max_gage_height = 0
sum_gage_height = 0
row_count = 0

max_discharge = 0
sum_discharge = 0

# Process the data in chunks
for chunk in pd.read_csv('/path/to/your_data.csv', chunksize=chunk_size):
    # Compute the max for each chunk
    max_gage_height = max(max_gage_height, chunk['110246_00065'].max())
    max_discharge = max(max_discharge, chunk['110247_00060'].max())
    
    # Sum the gage height and discharge for each chunk
    sum_gage_height += chunk['110246_00065'].sum()
    sum_discharge += chunk['110247_00060'].sum()
    
    # Count the rows in each chunk
    row_count += chunk.shape[0]

# After processing all chunks, calculate the mean
mean_gage_height = sum_gage_height / row_count
mean_discharge = sum_discharge / row_count

print(f"Max Gage Height: {max_gage_height}")
print(f"Average Gage Height: {mean_gage_height}")
print(f"Max Discharge: {max_discharge}")
print(f"Average Discharge: {mean_discharge}")


In [None]:
# Compute the mean and max of gage height and discharge, and measure the time taken

import time

start_time = time.time()

max_gage_height = final_data_cleaned['110246_00065'].max()
avg_gage_height = final_data_cleaned['110246_00065'].mean()

max_discharge = final_data_cleaned['110247_00060'].max()
avg_discharge = final_data_cleaned['110247_00060'].mean()

end_time = time.time()

print(f"Time taken for max and mean calculations: {end_time - start_time} seconds")


In [None]:
# Calculate maximum and average gage height
max_gage_height = final_data_cleaned['110246_00065'].max()
avg_gage_height = final_data_cleaned['110246_00065'].mean()

print(f"Maximum Gage Height: {max_gage_height} feet")
print(f"Average Gage Height: {avg_gage_height} feet")
