In [3]:
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns

# Define file paths for the cleaned datasets
cleaned_file_paths = {
    'benin-malanville': 'cleaned_benin-malanville_data.csv',
    'sierraleone-bumbuna': 'cleaned_sierraleone-bumbuna_data.csv',
    'togo-dapaong_qc': 'cleaned_togo-dapaong_qc_data.csv'
}

# Load cleaned datasets
dataframes = {country: pd.read_csv(path) for country, path in cleaned_file_paths.items()}

# Streamlit application
st.title('Environmental Data Dashboard')

# Sidebar for selecting the country
country = st.sidebar.selectbox('Select Country', options=list(dataframes.keys()))

# Load selected country's data
df = dataframes[country]

# Display basic information
st.header(f'Data Overview for {country}')
st.write(df.head())
st.write(df.describe())

# Plot Time Series Data
st.header('Time Series Plots')

# Convert 'Timestamp' column to datetime format if present
if 'Timestamp' in df.columns:
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.set_index('Timestamp', inplace=True)
    
    st.subheader('GHI, DNI, DHI, and Tamb Over Time')
    fig, ax = plt.subplots(figsize=(14, 8))
    ax.plot(df.index, df['GHI'], label='GHI')
    ax.plot(df.index, df['DNI'], label='DNI')
    ax.plot(df.index, df['DHI'], label='DHI')
    ax.plot(df.index, df['Tamb'], label='Tamb')
    ax.set_title('Time Series of GHI, DNI, DHI, and Tamb')
    ax.set_xlabel('Time')
    ax.set_ylabel('Values')
    ax.legend()
    st.pyplot(fig)
else:
    st.write("No 'Timestamp' column found for time series plotting.")

# Plot Correlation Matrix
st.header('Correlation Matrix')

# Drop non-numeric columns and handle NaN values
numeric_df = df.select_dtypes(include='number')
numeric_df.dropna(how='all', inplace=True)  # Drop rows where all elements are NaN

if not numeric_df.empty:
    corr = numeric_df.corr()
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f', ax=ax, vmin=-1, vmax=1)
    ax.set_title('Correlation Matrix')
    st.pyplot(fig)
else:
    st.write("No numeric data available for correlation matrix.")

# Plot Wind Analysis
st.header('Wind Analysis')

if 'WindSpeed' in df.columns and 'WindDirection' in df.columns:
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(data=df, x='WindSpeed', y='WindDirection', ax=ax)
    ax.set_title('Wind Speed vs Wind Direction')
    ax.set_xlabel('Wind Speed')
    ax.set_ylabel('Wind Direction')
    st.pyplot(fig)
else:
    st.write("Wind Speed and/or Wind Direction columns are missing for wind analysis.")

# Plot Temperature and Humidity Analysis
st.header('Temperature and Humidity Analysis')

if 'Tamb' in df.columns and 'Humidity' in df.columns:
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(data=df, x='Tamb', y='Humidity', ax=ax)
    ax.set_title('Temperature vs Humidity')
    ax.set_xlabel('Temperature')
    ax.set_ylabel('Humidity')
    st.pyplot(fig)
else:
    st.write("Temperature and/or Humidity columns are missing for temperature-humidity analysis.")


SyntaxError: invalid syntax (507122745.py, line 1)