# MVP validation workflow for the Oregon SQM Dashboard project.

Steps:
1. Load raw data tables and display basic info
2. Perform geocoding for all sites & save results
4. Visualize geocoded sites on a Folium map
5. Show bar-charts and scatter-plots.
6. Smoke-test Dash, Flask, and (lightweight) Streamlit app imports
7. Summarize results

## Load raw data tables and display basic info

In [None]:
# Import necessary libraries
from pathlib import Path
import pandas as pd
import sys

In [None]:
# Ensure project root is on path (one level up from 'development')
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Also add the shared directory so that `utils` package can be resolved (matches streamlit_app logic)
SHARED_DIR = PROJECT_ROOT / 'shared'
print(f"Project root: {PROJECT_ROOT}")



if str(SHARED_DIR) not in sys.path:
    sys.path.insert(0, str(SHARED_DIR))
print(f"Shared dir added to sys.path: {SHARED_DIR}")

# check all data-tables in data-base
RAW_DIR = PROJECT_ROOT / 'shared' / 'data' / 'raw'
print(f"Raw data dir: {RAW_DIR}")

# List raw CSVs
for p in RAW_DIR.glob('*.csv'):
    print('Found raw file:', p.name)

In [None]:
from shared.utils.data_processing import OregonSQMProcessor

In [None]:
process_data = OregonSQMProcessor(
    data_dir=PROJECT_ROOT / 'shared' / 'data'
)
raw_dfs = process_data.load_raw_data()

In [None]:
raw_dfs.keys()

In [None]:
for key in raw_dfs:
    print(key)
    display(raw_dfs[key].info())

## Perform geocoding for all sites & save results

In [None]:
# # Geocode all sites using only Google Maps API
# import requests
# import pandas as pd

# # API_KEY = '<redacted for privacy reasons>'  # <-- Replace with your Google Maps API key

# def google_maps_geocode(address, api_key):
#     url = f'https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={api_key}'
#     resp = requests.get(url)
#     if resp.status_code == 200:
#         results = resp.json().get('results', [])
#         if results:
#             loc = results[0]['geometry']['location']
#             return loc['lat'], loc['lng']
#     return None, None

# site_names = raw_dfs['sites']['Name'].dropna().tolist()

# results = []
# for i, site_name in enumerate(site_names, 1):
#     address = f"{site_name}, Oregon, USA"
#     print(f"[{i}/{len(site_names)}] Geocoding: {address}")
#     lat, lon = google_maps_geocode(address, API_KEY)
#     results.append({"site_name": site_name, "latitude": lat, "longitude": lon})


# geocode_df = pd.DataFrame(results)
# geocode_df.to_csv(PROJECT_ROOT / 'shared/data/geospatial/sites_coordinates.csv', index=False)
# geocode_df.head()

In [None]:
# geocode_df.to_csv(PROJECT_ROOT / 'shared/data/geospatial/sites_coordinates.csv', index=False)

## Visualize geocoded sites on a Folium map

In [None]:
geocode_df = pd.read_csv(PROJECT_ROOT / 'shared/data/raw/sites_coordinates.csv')

In [None]:
# # 4. Map visualization (requires successful geocoding)
# import folium

# fmap = folium.Map(location=[43.9,-120.6], zoom_start=6, tiles='CartoDB positron')
# for i, row in geocode_df.dropna(subset=['latitude', 'longitude']).iterrows():
#     folium.Marker(
#         location=[row['latitude'], row['longitude']],
#         popup=row['site_name'],
#         icon=folium.Icon(color='blue', icon='info-sign')
#     ).add_to(fmap)

#display(fmap)
    

## Visualization

In [None]:
# from shared.utils.visualizations import create_ranking_chart

# create_ranking_chart(
#     sites_df=raw_dfs['clear_measurements'],
#     y_col='median_brightness_mag_arcsec2'
# )

In [None]:
from shared.utils.visualizations import create_interactive_2d_plot

create_interactive_2d_plot(
    df=raw_dfs['clear_measurements'],
    x_col='median_brightness_mag_arcsec2',
    y_col='median_linear_scale_flux_ratio',
    hover_cols=['bortle_sky_level', 'x_brighter_than_darkest_night_sky'],
)