In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set page configuration
st.set_page_config(
    page_title="FAWN Climate Data Dashboard",
    page_icon="üå§Ô∏è",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
    <style>
    .main {
        padding: 0rem 1rem;
    }
    .stMetric {
        background-color: #f0f2f6;
        padding: 15px;
        border-radius: 10px;
    }
    h1 {
        color: #1f77b4;
        padding-bottom: 20px;
    }
    h2 {
        color: #2ca02c;
        padding-top: 20px;
    }
    </style>
    """, unsafe_allow_html=True)

# Title and description
st.title("üå§Ô∏è FAWN Climate Data Analysis Dashboard")
st.markdown("**Florida Automated Weather Network (FAWN) - Exploratory Data Analysis**")
st.markdown("---")

# Sidebar for file upload and filters
st.sidebar.header(" FAWN_clean dataset ")

# File uploader
uploaded_file = st.sidebar.file_uploader("Upload your cleaned FAWN data (CSV)", type=['csv'])

if uploaded_file is not None:
    # Load data
    @st.cache_data
    def load_data(file):
        df = pd.read_csv(file)
        # Convert Period to datetime if it exists
        if 'Period' in df.columns:
            df['Period'] = pd.to_datetime(df['Period'], errors='coerce')
        return df
    
    FAWN_clean = load_data(uploaded_file)
    
    # Display basic info
    st.sidebar.success(f"‚úÖ Data loaded: {len(FAWN_clean)} rows")
    
    # Find column names dynamically
    barometric_col = [col for col in FAWN_clean.columns if 'baramet' in col.lower()][0] if any('baramet' in col.lower() for col in FAWN_clean.columns) else None
    humidity_col = [col for col in FAWN_clean.columns if 'relhum' in col.lower()][0] if any('relhum' in col.lower() for col in FAWN_clean.columns) else None
    solar_col = [col for col in FAWN_clean.columns if 'solrad' in col.lower()][0] if any('solrad' in col.lower() for col in FAWN_clean.columns) else None
    rain_col = [col for col in FAWN_clean.columns if 'rain_tot' in col.lower()][0] if any('rain_tot' in col.lower() for col in FAWN_clean.columns) else None
    
    # Sidebar filters
    st.sidebar.markdown("### üîç Filter Options")
    
    # Station filter
    if 'FAWN Station' in FAWN_clean.columns:
        stations = ['All'] + sorted(FAWN_clean['FAWN Station'].unique().tolist())
        selected_station = st.sidebar.selectbox("Select FAWN Station", stations)
        
        if selected_station != 'All':
            FAWN_filtered = FAWN_clean[FAWN_clean['FAWN Station'] == selected_station].copy()
        else:
            FAWN_filtered = FAWN_clean.copy()
    else:
        FAWN_filtered = FAWN_clean.copy()
        selected_station = 'All'
    
    # Month filter
    if 'Month' in FAWN_clean.columns:
        months = ['All'] + sorted(FAWN_clean['Month'].unique().tolist())
        selected_months = st.sidebar.multiselect("Select Month(s)", months, default=['All'])
        
        if 'All' not in selected_months and len(selected_months) > 0:
            FAWN_filtered = FAWN_filtered[FAWN_filtered['Month'].isin(selected_months)]
    
    # Create tabs for different analyses
    tab1, tab2, tab3, tab4, tab5 = st.tabs([
        "üìä Overview", 
        "üå°Ô∏è Temperature Analysis", 
        "üåßÔ∏è Rainfall Analysis", 
        "üìà Correlations", 
        "üìâ Time Series"
    ])
    
    # TAB 1: OVERVIEW
    with tab1:
        st.header("üìä Dataset Overview")
        
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.metric("Total Records", f"{len(FAWN_filtered):,}")
        with col2:
            if 'FAWN Station' in FAWN_filtered.columns:
                st.metric("Stations", FAWN_filtered['FAWN Station'].nunique())
        with col3:
            if 'Month' in FAWN_filtered.columns:
                st.metric("Months", FAWN_filtered['Month'].nunique())
        with col4:
            if 'T_min (F)' in FAWN_filtered.columns:
                st.metric("Avg Min Temp (¬∞F)", f"{FAWN_filtered['T_min (F)'].mean():.1f}")
        
        st.markdown("---")
        
        # Display descriptive statistics
        st.subheader("üìã Descriptive Statistics")
        
        # Select numeric columns for statistics
        numeric_cols = FAWN_filtered.select_dtypes(include=[np.number]).columns.tolist()
        selected_cols = st.multiselect(
            "Select columns to display statistics",
            numeric_cols,
            default=numeric_cols[:5] if len(numeric_cols) >= 5 else numeric_cols
        )
        
        if selected_cols:
            st.dataframe(FAWN_filtered[selected_cols].describe(), use_container_width=True)
        
        st.markdown("---")
        
        # Data preview
        st.subheader("üîç Data Preview")
        st.dataframe(FAWN_filtered.head(100), use_container_width=True)
    
    # TAB 2: TEMPERATURE ANALYSIS
    with tab2:
        st.header("üå°Ô∏è Temperature Analysis")
        
        if 'T_min (F)' in FAWN_filtered.columns and 'Month' in FAWN_filtered.columns:
            
            # Calculate outliers
            if 'FAWN Station' in FAWN_filtered.columns:
                monthly_station_stat = FAWN_filtered.groupby(["Month", "FAWN Station"])["T_min (F)"].agg(["mean", "std"]).reset_index()
                monthly_station_stat["upper_bound"] = monthly_station_stat["mean"] + 2 * monthly_station_stat["std"]
                monthly_station_stat["lower_bound"] = monthly_station_stat["mean"] - 2 * monthly_station_stat["std"]
                
                FAWN_filtered = FAWN_filtered.merge(
                    monthly_station_stat[["Month", "FAWN Station", "upper_bound", "lower_bound"]],
                    on=["Month", "FAWN Station"],
                    how="left"
                )
            else:
                monthly_stat = FAWN_filtered.groupby("Month")["T_min (F)"].agg(["mean", "std"]).reset_index()
                monthly_stat["upper_bound"] = monthly_stat["mean"] + 2 * monthly_stat["std"]
                monthly_stat["lower_bound"] = monthly_stat["mean"] - 2 * monthly_stat["std"]
                
                FAWN_filtered = FAWN_filtered.merge(
                    monthly_stat[["Month", "upper_bound", "lower_bound"]],
                    on="Month",
                    how="left"
                )
            
            FAWN_filtered["Temp_min_outlier"] = (
                (FAWN_filtered["T_min (F)"] > FAWN_filtered["upper_bound"]) | 
                (FAWN_filtered["T_min (F)"] < FAWN_filtered["lower_bound"])
            )
            
            # Outlier metrics
            col1, col2, col3 = st.columns(3)
            with col1:
                total_outliers = FAWN_filtered["Temp_min_outlier"].sum()
                st.metric("Total Outliers", f"{total_outliers:,}")
            with col2:
                outlier_pct = (total_outliers / len(FAWN_filtered)) * 100
                st.metric("Outlier Percentage", f"{outlier_pct:.2f}%")
            with col3:
                avg_temp = FAWN_filtered["T_min (F)"].mean()
                st.metric("Average T_min", f"{avg_temp:.1f}¬∞F")
            
            st.markdown("---")
            
            # Temperature distribution by month
            st.subheader("üìä Temperature Distribution by Month")
            
            month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
                          7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
            
            # Box plot
            fig = go.Figure()
            
            for month in sorted(FAWN_filtered['Month'].unique()):
                month_data = FAWN_filtered[FAWN_filtered['Month'] == month]['T_min (F)']
                fig.add_trace(go.Box(
                    y=month_data,
                    name=month_names.get(month, str(month)),
                    boxmean='sd'
                ))
            
            fig.update_layout(
                title="Minimum Temperature Distribution by Month",
                yaxis_title="Temperature (¬∞F)",
                xaxis_title="Month",
                height=500,
                showlegend=False
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
            st.markdown("---")
            
            # Histogram with outliers
            st.subheader("üìà Temperature Histograms with Outliers")
            
            selected_month = st.selectbox(
                "Select month to visualize",
                sorted(FAWN_filtered['Month'].unique()),
                format_func=lambda x: month_names.get(x, str(x))
            )
            
            month_data = FAWN_filtered[FAWN_filtered['Month'] == selected_month].copy()
            outliers = month_data[month_data['Temp_min_outlier'] == True]['T_min (F)']
            non_outliers = month_data[month_data['Temp_min_outlier'] == False]['T_min (F)']
            
            fig = go.Figure()
            
            # Non-outliers histogram
            fig.add_trace(go.Histogram(
                x=non_outliers,
                name='Non-outliers',
                opacity=0.6,
                marker_color='blue',
                nbinsx=30
            ))
            
            # Outliers histogram
            if len(outliers) > 0:
                fig.add_trace(go.Histogram(
                    x=outliers,
                    name='Outliers',
                    opacity=0.9,
                    marker_color='red',
                    nbinsx=30
                ))
            
            fig.update_layout(
                title=f"Temperature Distribution - {month_names.get(selected_month, str(selected_month))}",
                xaxis_title="Temperature (¬∞F)",
                yaxis_title="Frequency",
                barmode='overlay',
                height=500
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
        else:
            st.warning("Temperature or Month columns not found in the dataset.")
    
    # TAB 3: RAINFALL ANALYSIS
    with tab3:
        st.header("üåßÔ∏è Rainfall Analysis")
        
        if rain_col and 'Month' in FAWN_filtered.columns:
            
            # Rainfall metrics
            col1, col2, col3 = st.columns(3)
            with col1:
                total_rain = FAWN_filtered[rain_col].sum()
                st.metric("Total Rainfall (in)", f"{total_rain:.2f}")
            with col2:
                avg_rain = FAWN_filtered[rain_col].mean()
                st.metric("Average Rainfall (in)", f"{avg_rain:.4f}")
            with col3:
                rainy_days = (FAWN_filtered[rain_col] > 0).sum()
                st.metric("Rainy Days", f"{rainy_days:,}")
            
            st.markdown("---")
            
            # Rainy vs Non-rainy days
            st.subheader("‚òî Rainy vs Non-Rainy Days by Month")
            
            if 'FAWN Station' in FAWN_filtered.columns:
                zero_rain = FAWN_filtered[FAWN_filtered[rain_col] == 0].groupby(['Month', 'FAWN Station']).size().reset_index(name='Non-Rainy Days')
                rainy = FAWN_filtered[FAWN_filtered[rain_col] > 0].groupby(['Month', 'FAWN Station']).size().reset_index(name='Rainy Days')
                
                rain_summary = zero_rain.merge(rainy, on=['Month', 'FAWN Station'], how='outer').fillna(0)
            else:
                zero_rain = FAWN_filtered[FAWN_filtered[rain_col] == 0].groupby('Month').size().reset_index(name='Non-Rainy Days')
                rainy = FAWN_filtered[FAWN_filtered[rain_col] > 0].groupby('Month').size().reset_index(name='Rainy Days')
                
                rain_summary = zero_rain.merge(rainy, on='Month', how='outer').fillna(0)
            
            # Bar chart
            fig = go.Figure()
            
            month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
                          7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
            
            if 'FAWN Station' in rain_summary.columns:
                for station in rain_summary['FAWN Station'].unique():
                    station_data = rain_summary[rain_summary['FAWN Station'] == station]
                    fig.add_trace(go.Bar(
                        x=[month_names.get(m, str(m)) for m in station_data['Month']],
                        y=station_data['Rainy Days'],
                        name=f'{station} - Rainy',
                        marker_color='steelblue'
                    ))
                    fig.add_trace(go.Bar(
                        x=[month_names.get(m, str(m)) for m in station_data['Month']],
                        y=station_data['Non-Rainy Days'],
                        name=f'{station} - Non-Rainy',
                        marker_color='skyblue'
                    ))
            else:
                fig.add_trace(go.Bar(
                    x=[month_names.get(m, str(m)) for m in rain_summary['Month']],
                    y=rain_summary['Rainy Days'],
                    name='Rainy Days',
                    marker_color='steelblue'
                ))
                fig.add_trace(go.Bar(
                    x=[month_names.get(m, str(m)) for m in rain_summary['Month']],
                    y=rain_summary['Non-Rainy Days'],
                    name='Non-Rainy Days',
                    marker_color='skyblue'
                ))
            
            fig.update_layout(
                title="Rainy vs Non-Rainy Days by Month",
                xaxis_title="Month",
                yaxis_title="Number of Days",
                barmode='group',
                height=500
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
            st.markdown("---")
            
            # Monthly rainfall totals
            st.subheader("üìä Monthly Rainfall Totals")
            
            monthly_rain = FAWN_filtered.groupby('Month')[rain_col].sum().reset_index()
            
            fig = go.Figure()
            fig.add_trace(go.Bar(
                x=[month_names.get(m, str(m)) for m in monthly_rain['Month']],
                y=monthly_rain[rain_col],
                marker_color='darkblue',
                text=monthly_rain[rain_col].round(2),
                textposition='auto'
            ))
            
            fig.update_layout(
                title="Total Rainfall by Month",
                xaxis_title="Month",
                yaxis_title="Total Rainfall (in)",
                height=500
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
        else:
            st.warning("Rainfall or Month columns not found in the dataset.")
    
    # TAB 4: CORRELATIONS
    with tab4:
        st.header("üìà Correlation Analysis")
        
        # Barometric Pressure vs Humidity
        if barometric_col and humidity_col:
            st.subheader("üå°Ô∏è Barometric Pressure vs Relative Humidity")
            
            fig = px.scatter(
                FAWN_filtered,
                x=barometric_col,
                y=humidity_col,
                color='FAWN Station' if 'FAWN Station' in FAWN_filtered.columns else None,
                title="Barometric Pressure vs Relative Humidity",
                labels={barometric_col: "Barometric Pressure (mb)", 
                       humidity_col: "Relative Humidity (%)"},
                height=600,
                opacity=0.6
            )
            
            st.plotly_chart(fig, use_container_width=True)
            
            # Calculate correlation
            corr = FAWN_filtered[[barometric_col, humidity_col]].corr().iloc[0, 1]
            st.info(f"**Correlation coefficient:** {corr:.4f}")
        
        st.markdown("---")
        
        # Solar Radiation correlations
        if solar_col and 'Month' in FAWN_filtered.columns:
            st.subheader("‚òÄÔ∏è Solar Radiation Relationships")
            
            col1, col2 = st.columns(2)
            
            with col1:
                if rain_col:
                    # Solar vs Rainfall
                    monthly_data = FAWN_filtered.groupby('Month').agg({
                        solar_col: 'mean',
                        rain_col: 'mean'
                    }).reset_index()
                    
                    fig = make_subplots(specs=[[{"secondary_y": True}]])
                    
                    fig.add_trace(
                        go.Scatter(x=monthly_data['Month'], y=monthly_data[solar_col],
                                  name="Solar Radiation", mode='lines+markers',
                                  line=dict(color='orange', width=3)),
                        secondary_y=False
                    )
                    
                    fig.add_trace(
                        go.Scatter(x=monthly_data['Month'], y=monthly_data[rain_col],
                                  name="Rainfall", mode='lines+markers',
                                  line=dict(color='blue', width=3)),
                        secondary_y=True
                    )
                    
                    fig.update_xaxes(title_text="Month")
                    fig.update_yaxes(title_text="Solar Radiation (w/m¬≤)", secondary_y=False)
                    fig.update_yaxes(title_text="Rainfall (in)", secondary_y=True)
                    fig.update_layout(title="Solar Radiation vs Rainfall", height=400)
                    
                    st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                if 'T_max (F)' in FAWN_filtered.columns:
                    # Solar vs Temperature
                    monthly_temp = FAWN_filtered.groupby('Month').agg({
                        solar_col: 'mean',
                        'T_max (F)': 'mean'
                    }).reset_index()
                    
                    fig = make_subplots(specs=[[{"secondary_y": True}]])
                    
                    fig.add_trace(
                        go.Scatter(x=monthly_temp['Month'], y=monthly_temp[solar_col],
                                  name="Solar Radiation", mode='lines+markers',
                                  line=dict(color='orange', width=3)),
                        secondary_y=False
                    )
                    
                    fig.add_trace(
                        go.Scatter(x=monthly_temp['Month'], y=monthly_temp['T_max (F)'],
                                  name="Max Temperature", mode='lines+markers',
                                  line=dict(color='red', width=3)),
                        secondary_y=True
                    )
                    
                    fig.update_xaxes(title_text="Month")
                    fig.update_yaxes(title_text="Solar Radiation (w/m¬≤)", secondary_y=False)
                    fig.update_yaxes(title_text="Max Temperature (¬∞F)", secondary_y=True)
                    fig.update_layout(title="Solar Radiation vs Max Temperature", height=400)
                    
                    st.plotly_chart(fig, use_container_width=True)
        
        st.markdown("---")
        
        # Correlation heatmap
        st.subheader("üî• Correlation Heatmap")
        
        numeric_cols = FAWN_filtered.select_dtypes(include=[np.number]).columns.tolist()
        selected_corr_cols = st.multiselect(
            "Select variables for correlation analysis",
            numeric_cols,
            default=numeric_cols[:6] if len(numeric_cols) >= 6 else numeric_cols
        )
        
        if len(selected_corr_cols) >= 2:
            corr_matrix = FAWN_filtered[selected_corr_cols].corr()
            
            fig = px.imshow(
                corr_matrix,
                labels=dict(color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                color_continuous_scale='RdBu_r',
                zmin=-1,
                zmax=1,
                text_auto='.2f',
                aspect='auto',
                height=600
            )
            
            fig.update_layout(title="Correlation Heatmap")
            st.plotly_chart(fig, use_container_width=True)
    
    # TAB 5: TIME SERIES
    with tab5:
        st.header("üìâ Time Series Analysis")
        
        if 'Period' in FAWN_filtered.columns:
            st.subheader("‚è∞ Time Series Visualization")
            
            # Variable selection
            numeric_cols = FAWN_filtered.select_dtypes(include=[np.number]).columns.tolist()
            selected_var = st.selectbox("Select variable to plot over time", numeric_cols)
            
            if selected_var:
                # Line chart
                if 'FAWN Station' in FAWN_filtered.columns:
                    fig = px.line(
                        FAWN_filtered.sort_values('Period'),
                        x='Period',
                        y=selected_var,
                        color='FAWN Station',
                        title=f"{selected_var} Over Time by Station",
                        height=600
                    )
                else:
                    fig = px.line(
                        FAWN_filtered.sort_values('Period'),
                        x='Period',
                        y=selected_var,
                        title=f"{selected_var} Over Time",
                        height=600
                    )
                
                st.plotly_chart(fig, use_container_width=True)
                
                st.markdown("---")
                
                # Rolling average
                st.subheader("üìä Rolling Average")
                window_size = st.slider("Select window size (days)", 1, 30, 7)
                
                FAWN_sorted = FAWN_filtered.sort_values('Period').copy()
                FAWN_sorted[f'{selected_var}_rolling'] = FAWN_sorted[selected_var].rolling(window=window_size).mean()
                
                fig = go.Figure()
                
                fig.add_trace(go.Scatter(
                    x=FAWN_sorted['Period'],
                    y=FAWN_sorted[selected_var],
                    name='Original',
                    opacity=0.3,
                    line=dict(color='lightblue')
                ))
                
                fig.add_trace(go.Scatter(
                    x=FAWN_sorted['Period'],
                    y=FAWN_sorted[f'{selected_var}_rolling'],
                    name=f'{window_size}-day Rolling Average',
                    line=dict(color='darkblue', width=2)
                ))
                
                fig.update_layout(
                    title=f"{selected_var} with {window_size}-day Rolling Average",
                    xaxis_title="Date",
                    yaxis_title=selected_var,
                    height=600
                )
                
                st.plotly_chart(fig, use_container_width=True)
        else:
            st.warning("Period/Date column not found in the dataset.")
    
    # Footer
    st.markdown("---")
    st.markdown("**Dashboard created for FAWN Climate Data Analysis** | Powered by Streamlit")

else:
    # Welcome screen when no file is uploaded
    st.info("üëà Please upload your cleaned FAWN data CSV file from the sidebar to begin analysis.")
    
    st.markdown("""
    ### üìã Instructions:
    1. Upload your cleaned FAWN dataset (CSV format)
    2. Use the sidebar to filter by station and month
    3. Explore different tabs for various analyses:
        - **Overview**: Dataset summary and statistics
        - **Temperature Analysis**: Temperature distributions and outliers
        - **Rainfall Analysis**: Rainy vs non-rainy days patterns
        - **Correlations**: Relationships between variables
        - **Time Series**: Temporal patterns and trends
    
    ### üìä Features:
    - Interactive visualizations with Plotly
    - Dynamic filtering by station and month
    - Outlier detection for temperature data
    - Correlation analysis
    - Time series analysis with rolling averages
    """)