# MVP validation workflow for the Oregon SQM Dashboard project.

Steps:
1. Load raw data tables and display basic info
2. Perform geocoding for all sites & save results
4. Visualize geocoded sites on a Folium map
5. Show bar-charts and scatter-plots.
6. Smoke-test Dash, Flask, and (lightweight) Streamlit app imports
7. Summarize results

## Load raw data tables and display basic info

In [1]:
# Import necessary libraries
from pathlib import Path
import json
import pandas as pd
import importlib
import sys
import logging

In [None]:
# Ensure project root is on path (one level up from 'development')
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Also add the shared directory so that `utils` package can be resolved (matches streamlit_app logic)
SHARED_DIR = PROJECT_ROOT / 'shared'
if str(SHARED_DIR) not in sys.path:
    sys.path.insert(0, str(SHARED_DIR))

RAW_DIR = PROJECT_ROOT / 'shared' / 'data' / 'raw'
PROCESSED_DIR = PROJECT_ROOT / 'shared' / 'data' / 'processed'
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Shared dir added to sys.path: {SHARED_DIR}")
print(f"Raw data dir: {RAW_DIR}")
print(f"Processed data dir: {PROCESSED_DIR}")

# List raw CSVs
for p in RAW_DIR.glob('*.csv'):
    print('Found raw file:', p.name)

Project root: /home/vidit-agrawal/projects/darksky-oregon-dashboard
Shared dir added to sys.path: /home/vidit-agrawal/projects/darksky-oregon-dashboard/shared
Raw data dir: /home/vidit-agrawal/projects/darksky-oregon-dashboard/shared/data/raw
Processed data dir: /home/vidit-agrawal/projects/darksky-oregon-dashboard/shared/data/processed
Found raw file: cloudy_night_measurements.csv
Found raw file: sites_locations.csv
Found raw file: cloud_coverage.csv
Found raw file: clear_night_measurements.csv
Found raw file: longterm_trends.csv
Found raw file: milky_way_visibility.csv
utils package import: OK


In [3]:
from utils.data_processing import OregonSQMProcessor

In [4]:
process_data = OregonSQMProcessor(
    data_dir=PROJECT_ROOT / 'shared' / 'data'
)
raw_dfs = process_data.load_raw_data()

INFO:utils.data_processing:Initializing OregonSQMProcessor with data directory: /home/vidit-agrawal/projects/darksky-oregon-dashboard/shared/data
INFO:utils.data_processing:Loaded sites: 63 records
INFO:utils.data_processing:Loaded clear_measurements: 58 records
INFO:utils.data_processing:Loaded cloudy_measurements: 60 records
INFO:utils.data_processing:Loaded trends: 32 records
INFO:utils.data_processing:Loaded milky_way: 57 records
INFO:utils.data_processing:Loaded cloud_coverage: 60 records
INFO:utils.data_processing:Loaded sites: 63 records
INFO:utils.data_processing:Loaded clear_measurements: 58 records
INFO:utils.data_processing:Loaded cloudy_measurements: 60 records
INFO:utils.data_processing:Loaded trends: 32 records
INFO:utils.data_processing:Loaded milky_way: 57 records
INFO:utils.data_processing:Loaded cloud_coverage: 60 records


In [5]:
raw_dfs.keys()

dict_keys(['sites', 'clear_measurements', 'cloudy_measurements', 'trends', 'milky_way', 'cloud_coverage'])

In [6]:
raw_dfs['sites'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Name            63 non-null     object
 1   Install Number  63 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.1+ KB


In [7]:
--

SyntaxError: invalid syntax (3659366440.py, line 1)

## Perform geocoding for all sites & save results

## Visualize geocoded sites on a Folium map

In [None]:
# 4. Map visualization (requires successful geocoding)
import folium

coords = {k:v for k,v in geocode_results.items() if v}
if coords:
    # Center roughly on Oregon
    fmap = folium.Map(location=[43.9,-120.6], zoom_start=6, tiles='CartoDB positron')
    for name,(lat,lon) in coords.items():
        folium.Marker([lat,lon], popup=name).add_to(fmap)
    display(fmap)
else:
    print('No coordinates to map yet.')