In [None]:
import pandas as pd
import plotly.express as px
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

<h1 style="text-align: center;">Welcome to the Clean Water Team!</h1>
<h2 style="text-align: center;">Now a Brief Introduction to the Problem That You Will Help Solve</h2>

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Load the audio file
audio = widgets.Audio.from_file("loop_1min_snes.wav")
audio.loop = True
audio.autoplay = False   # browsers usually block autoplay until first click
audio.layout = widgets.Layout(width="200px")  # shrink the player bar

# Label + audio side by side
box = widgets.HBox([
    widgets.Label("", layout=widgets.Layout(width="0px")),
    audio
])

display(box)
#src = Path("Soundtrack\loop_1min.wav")


Let's take a step back in time and go back to the 1950s for the Las Vegas Valley (also known as Clark County):
<ul> 
<li>Las Vegas and the surrounding communities have experienced rapid growth over the past few years as well as domestic and international attention from tourism from the rise of the Las Vegas Strip and Fremont Street.</li>
<li>This is great news in that the community is expanding, but at this point in time there is no centralized wastewater treatment for the area.</li>
<li>There were many decentralized septic tanks and even private wastewater treatment plants on the Las Vegas Strip (Yes! This is true believe or not!) in the 1950s.</red></li>
<li>As centralized wastewater treatment is cheaper in the longrun as a city grows, the Clark County Water Reclamation District was created a General Improvement District on August 11, 1954 with the mission to treat wastewater flows from the Las Vegas Valley's unincorporated areas which includes the Las Vegas Strip.</li>
<li>In early 1955, Clark County residents voted for the issuance of bonds for the construction of a collection system (pipelines) and wastewater treatment facility.</li>
</ul>
This is where you come in, the residents of Clark County are depending on you to plan, design, and operate a wastewater collections system and treatment facility!

<h2>Planning</h2>
The first step in any big endeavor like this is to plan just how large of facilities are required.  To do this, engineers and planners first have to estimate the amount of wastewater that will enter a sewer collections system and wastewater treatment plant. <b>This is a critical step as we do not want either too large or too small of facilities.</b>
<br>To figure out how much flow is entering our facility we will figure out the population of our service area and project it from 1955 to 1980.  As you will soon see, population directly translates to wastewater flows.  We are lucky in that we have recent data from 1950 to 1955 that we can use for Unicorporated Clark County and the other local areas, as well as room and visitor information.<br><b>In Engineering and Planning please note that daily visitors to an area are considered additional population.</b>


In [None]:
import folium

# Define the aerial image bounds
aerial_bounds = [
    [36.042734, -115.191564],  # SW corner
    [36.152722, -115.056038]   # NE corner
]

# Start centered on Harry Reid Airport, zoomed in
m = folium.Map(
    location=[36.0840, -115.1537],  # Harry Reid Intl Airport
    zoom_start=16,
    width=600,
    height=400,
    max_bounds=True                 # lock map to bounds
)

# Modern aerial basemap
folium.TileLayer(
    tiles="https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
    attr="Esri, Maxar, Earthstar Geographics, and the GIS User Community",
    name="Esri World Imagery",
    overlay=False,
    control=True
).add_to(m)

# Optional labels
folium.TileLayer(
    tiles="https://services.arcgisonline.com/ArcGIS/rest/services/Reference/World_Boundaries_and_Places/MapServer/tile/{z}/{y}/{x}",
    attr="Esri",
    name="Labels (Boundaries & Places)",
    overlay=True,
    control=True
).add_to(m)

# Historic overlay
folium.raster_layers.ImageOverlay(
    image="vegas_1958.png",
    bounds=aerial_bounds,
    #opacity=0.7,
    name="Las Vegas 1958 Aerial"
).add_to(m)

# Layer controls
folium.LayerControl().add_to(m)

m


In [None]:
# Step 1: Read Excel with Year as a normal column
Population1950to1955 = pd.read_csv('Population Data - 1950 to 1955.csv')

# Step 2: Apply comma formatting (but skip Year)
#Population1950to1955_formatted = Population1950to1955.copy()
'''for col in Population1950to1955_formatted.columns:
    if col != "Year":
        Population1950to1955_formatted[col] = Population1950to1955_formatted[col].apply(
            lambda x: f"{x:,}" if isinstance(x, (int, float)) else x
        )

# Step 3: Style for display
styled = (
    Population1950to1955_formatted
    .style.hide(axis="index")   # no index column
    .set_properties(
        subset=Population1950to1955_formatted.columns[1:],  # center all except Year
        **{"text-align": "center"}
    )
)

#display(styled)'''
display(Population1950to1955)
Population1950to1955 = Population1950to1955.set_index('Year')
#print(Population1950to1955.dtypes)


<h3>Time to Figure out Unincorporated Clark County Population Growth to 1980!</h3>
In graphical form our data looks like this for just Unincorporated Clark County without including an visitor information.

In [None]:

Population1950to1955 = Population1950to1955.reset_index()

# Reshape wide → long for Plotly (all airlines in one column)
df_long = Population1950to1955.melt(id_vars='Year', 
                           value_vars=['Unincorporated Clark County'], 
                           var_name='Area', value_name='Population')

fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Population Data for Unincorporated Clark County 1950 to 1955",
    labels={'Population' : "Population"}
)

fig.update_traces(
    hovertemplate=(
        "Area = %{fullData.name}<br>"
        "Year = %{x}<br>"
        "Population = %{y:,}"
        "<extra></extra>"
    )
)

fig.update_layout(
    width=800, height=600,
    yaxis_title="Population",
    #this puts the legend on the bottom
    legend=dict(
        title=dict(text="", side='top center'),  # legend title above items
        orientation="h",     # horizontal layout
        yanchor="top",
        y=-0.3,              # push below chart
        xanchor="center",
        x=0.5
    ),
    #centers the title
    title=dict(
        text="Population Data for Unincorporated Clark County 1950 to 1955",
        x=0.5,  # 0=left, 0.5=center, 1=right
        xanchor="center"
    )
)

fig.show()
Population1950to1955 = Population1950to1955.set_index('Year')

In [None]:
Population1950to1955['Years Since 1950']=np.arange(len(Population1950to1955))
#Population1950to1955.head()

Do you see a linear trend to the population growth in Unincorporated Clark County?  It certainly looks that way from the data thus far!  Over 5 years, our population increase from 10,170 to 17,866.  This allows us to come up with a good linear equation to predict future population growth!
<br>Unfortunately now it's time for some boring math... 😞 where we need to start with a linear regression equation... 🤔<br>
$$y=mx+b$$
where $y$ is the predicted population, <br>
$m$ is the slope of our data, <br>
$x$ is the number of years since 1950, <br>
and $b$ is the intercept of 10,170 which is our starting population in 1950 <br><br>
You will be walked through this one but you will be doing the linear regression for daily visitors! <br>
Let us first start with the slope $m$, to get a slope you need rise over run which in this case the rise would be the population growth and the run would be the number of years. <br>
Therefore our rise is $$17,866-10,170=7,696$$ and the run is just 5 since it's five years.  Therefore our slope is $$\frac{7,696}{5}=1,539.2$$  What exactly does this mean?  It means that our population increases by about 1,539 persons per year for Unincorporated Clark County! <br>
Believe it or not, but you now have the entire equation you need to predict the population between now and 1980.  $$y=1,539 \times x + 10,170$$
Now it's time for a quiz.  Please tell us what the estimated population of Unincorporated Clark County would be in the year 1980?


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

def make_question(question_text, options, correct_answer):
    # Widgets
    dropdown = widgets.Dropdown(
        options=[""] + options,   # ensure "" starts blank
        value="",
        description="Answer:",
        style={'description_width': 'initial'}
    )

    button = widgets.Button(
        description="Check Answer",
        button_style="info"
    )

    output = widgets.Output()

    # Callback (closes over dropdown, output, correct_answer)
    def check_answer(b):
        with output:
            clear_output()
            if dropdown.value == "":
                print("Please select an answer!")
            elif dropdown.value == correct_answer:
                print("✅ Correct!")
            else:
                print(f"❌ Incorrect. The correct answer is {correct_answer}.")

    button.on_click(check_answer)

    # Display
    display(widgets.HTML(f"<h3>{question_text}</h3>"))
    display(dropdown, button, output)

In [None]:
make_question(
    "What is the estimated population of Unincorporated Clark County in 1980?",
    ["10,170", "1,000,000", "56,340", "I Don't Know!"],
    "56,340"
)

<h3>Time to Figure out Visitor Growth to 1980!</h3>
You will now repeat the same process that you completed for the Unincorporated Clark County Population with the Visitor Data.  <b>Recall that Engineers and planners consider daily visitors as additional population to a service area. </b>

In [None]:
Population1950to1955 = Population1950to1955.reset_index()

# Reshape wide → long for Plotly (all airlines in one column)
df_long = Population1950to1955.melt(id_vars='Year', 
                           value_vars=['Average Daily Visitors'], 
                           var_name='Area', value_name='Population')

fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Average Daily Vistors for Unincorporated Clark County 1950 to 1955",
    labels={'Population' : "Population"}
)

fig.update_traces(
    hovertemplate=(
        "Area = %{fullData.name}<br>"
        "Year = %{x}<br>"
        "Population = %{y:,}"
        "<extra></extra>"
    )
)

fig.update_layout(
    width=800, height=600,
    yaxis_title="Population",
    #this puts the legend on the bottom
    legend=dict(
        title=dict(text="", side='top center'),  # legend title above items
        orientation="h",     # horizontal layout
        yanchor="top",
        y=-0.3,              # push below chart
        xanchor="center",
        x=0.5
    ),
    #centers the title
    title=dict(
        text="Average Daily Visitor Data for Unincorporated Clark County 1950 to 1955",
        x=0.5,  # 0=left, 0.5=center, 1=right
        xanchor="center"
    )
)

fig.show()
Population1950to1955 = Population1950to1955.set_index('Year')

The number of visitors in 1950 is 6,120 and in 1955 it jumps to 9,120.  Now time for three pop quizzes!  Aren't we evil? 😈

In [None]:
make_question(
    "What should we set our intercept b as?",
    ["6,120", "1,000,000,000", "9,120", "I Don't Know!"],
    "6,120"
)

In [None]:
make_question(
    "What is the slope of our linear regression equation?",
    ["1,000,000,000", "9,120", "600", "I Don't Know!"],
    "600"
)

In [None]:
make_question(
    "What is the estimated number of daily visitors in 1980?",
    ["🐶", "24,120", "600", "I Don't Know!"],
    "24,120"
)

<h3>Time to Put it all Together!</h3>
It's time to visualize your data in graphical form!

In [None]:
Population1950to1955 = Population1950to1955.reset_index()
#common linear regression equation
def linear_equation(year,intercept,slope):
    return intercept + slope * (year - 1950)  # shift so 1950 = 0

years = np.arange(1956, 1981)
predicted_1 = [linear_equation(y,10170,1539) for y in years]
predicted_2 = [linear_equation(y,6120,600) for y in years]

predicted_df = pd.DataFrame({"Year": years, "Unincorporated Clark County": predicted_1, "Average Daily Visitors": predicted_2})
Population1950to1955 = Population1950to1955.set_index('Year')
predicted_df = predicted_df.set_index("Year")
Population1950to1980_Predicted = pd.concat([Population1950to1955, predicted_df])
Population1950to1980_Predicted['Total Predicted Unincorporated Clark County'] = Population1950to1980_Predicted['Average Daily Visitors']+Population1950to1980_Predicted['Unincorporated Clark County']

In [None]:
Population1950to1980_Predicted = Population1950to1980_Predicted.reset_index()

# Reshape wide → long for Plotly (all airlines in one column)
df_long = Population1950to1980_Predicted.melt(id_vars='Year', 
                           value_vars=['Unincorporated Clark County','Average Daily Visitors'], 
                           var_name='Area', value_name='Population')

fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Predicted Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
    labels={'Population' : "Population"}
)

fig.update_traces(
    hovertemplate=(
        "Area = %{fullData.name}<br>"
        "Year = %{x}<br>"
        "Population = %{y:,}"
        "<extra></extra>"
    )
)

fig.update_layout(
    width=1200, height=600,
    yaxis_title="Population",
    #this puts the legend on the bottom
    legend=dict(
        title=dict(text="", side='top center'),  # legend title above items
        orientation="h",     # horizontal layout
        yanchor="top",
        y=-0.3,              # push below chart
        xanchor="center",
        x=0.5
    ),
    #centers the title
    title=dict(
        text="Predicted Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
        x=0.5,  # 0=left, 0.5=center, 1=right
        xanchor="center"
    )
)

fig.show()
Population1950to1980_Predicted['Years Since 1950']=np.arange(len(Population1950to1980_Predicted))
Population1950to1980_Predicted = Population1950to1980_Predicted.set_index('Year')
#Population1950to1980_Predicted

Now for the real question since we live in mondern times.  How does this compare to the actual data?  Thankfully we have that data!
<h3>A Comparison to the Real Data!</h3>

In [None]:
# Step 1: Read Excel with Year as a normal column
Population1950to1980 = pd.read_csv('Population Data - 1950 to 1980.xlsx')
Population1950to1980_Predicted = Population1950to1980_Predicted.rename(columns={"Unincorporated Clark County": "Predicted Unincorporated Clark County"})
Population1950to1980_Predicted = Population1950to1980_Predicted.rename(columns={'Average Daily Visitors': 'Predicted Average Daily Visitors'})

In [None]:
Population1950to1980_Merged = pd.merge(
    Population1950to1980_Predicted.reset_index(),
    Population1950to1980.reset_index(),
    on="Year",
    how="outer"   # or 'inner' depending on whether you want all years or only common years
)
# Reshape wide → long for Plotly (all airlines in one column)
df_long = Population1950to1980_Merged.melt(id_vars='Year', 
                           value_vars=['Predicted Unincorporated Clark County',
                                       'Actual Unincorporated Clark County','Predicted Average Daily Visitors', 'Actual Average Daily Visitors'], 
                           var_name='Area', value_name='Population')

fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Predicted and Actual Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
    labels={'Population' : "Population"}
)

fig.update_traces(
    hovertemplate=(
        "Area = %{fullData.name}<br>"
        "Year = %{x}<br>"
        "Population = %{y:,}"
        "<extra></extra>"
    )
)

fig.update_layout(
    width=1200, height=600,
    yaxis_title="Population",
    #this puts the legend on the bottom
    legend=dict(
        title=dict(text="", side='top center'),  # legend title above items
        orientation="h",     # horizontal layout
        yanchor="top",
        y=-0.3,              # push below chart
        xanchor="center",
        x=0.5
    ),
    #centers the title
    title=dict(
        text="Predicted and Actual Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
        x=0.5,  # 0=left, 0.5=center, 1=right
        xanchor="center"
    )
)


fig.show()
#Population1950to1955


In the figure above try comparing the total populations for each category, in particular the values for the daily visitors and Unincorporated Clark County Population... <br>
As you can see the daily visitors are underestimated by as much as 33% and the Unincorporated Clark County Population was underestimated by a factor of four!  <br>
What happened?  This is one of the biggest challenges of planning for any type of utility or improvement related to population is that what was linear at first can change with time or simply increase more per year than we had initially predicted.  <b>This is normal and this is why our estimates always need to be verified and updated when new data arises to make sure our facilities are adequately sized! Our original assumptions could very well have happened had our area not experienced explosive growth.</b>
<br><br>Thankfully we have modern tools and much more access to data, so it is easier to update these estimates.  We also now have access to modern computers to assist us in this process!

In [None]:
import plotly.graph_objects as go

# If 'Year' is the index, bring it back first (safe to call even if it's already a column)
#Population1950to1955 = Population1950to1955.set_index('Year')
#Population1950to1955 = Population1950to1955.set_index('Year')
df = Population1950to1955.reset_index()

col_to_fit = "Average Daily Visitors"   # <-- change this if needed

## 1) Use only 1950–1955 data (and positive values for log fit)
fit_slice = df[(df["Year"] >= 1950) & (df["Year"] <= 1955)][["Year", col_to_fit]].dropna()
it_slice = fit_slice[fit_slice[col_to_fit] > 0]

# 2) Fit ln(y) = ln(a) + b*(x - x0) to improve numerical stability (x0 = first year in slice)
x0 = int(fit_slice["Year"].min())
X_fit = (fit_slice["Year"] - x0).to_numpy(dtype=int)
y_fit = fit_slice[col_to_fit].to_numpy(dtype=int)

slope, intercept = np.polyfit(X_fit, np.log(y_fit), 1)
a = np.exp(intercept)
b = slope  # note: model is y = a * exp(b * (x - x0))

# 3) Build smooth curve over 1950–1955 (or extend further if you want)
years_curve = np.arange(1950, 1955 + 0.1, 0.1)  # smooth line within the fit window
y_curve = a * np.exp(b * (years_curve - x0))

Population1950to1980_Predicted['Exponential Predicted Average Daily Visitors'] = (
    a * np.exp(b * (Population1950to1980_Predicted.index.to_numpy(dtype=int) - x0))
).astype("int64")

#Population1950to1955 = Population1950to1955.set_index('Year')
# (Optional) show fitted parameters in console
#print(f"Fitted model for '{col_to_fit}': y = {a:.3g} * exp({b:.3g} * (Year - {x0}))")
#Population1950to1980_Predicted

In [None]:
# If 'Year' is the index, bring it back first (safe to call even if it's already a column)
df = Population1950to1955.reset_index()

col_to_fit = "Unincorporated Clark County"   # <-- change this if needed

# 1) Use only 1950–1955 data (and positive values for log fit)
fit_slice = df[(df["Year"] >= 1950) & (df["Year"] <= 1955)][["Year", col_to_fit]].dropna()
fit_slice = fit_slice[fit_slice[col_to_fit] > 0]

# 2) Fit ln(y) = ln(a) + b*(x - x0) to improve numerical stability (x0 = first year in slice)
x0 = int(fit_slice["Year"].min())
X_fit = (fit_slice["Year"] - x0).to_numpy(dtype=float)
y_fit = fit_slice[col_to_fit].to_numpy(dtype=float)

slope, intercept = np.polyfit(X_fit, np.log(y_fit), 1)
a = np.exp(intercept)
b = slope  # note: model is y = a * exp(b * (x - x0))

# 3) Build smooth curve over 1950–1955 (or extend further if you want)
years_curve = np.arange(1950, 1955 + 0.1, 0.1)  # smooth line within the fit window
y_curve = a * np.exp(b * (years_curve - x0))

Population1950to1980_Predicted['Exponential Predicted Unincorporated Clark County'] = (
    a * np.exp(b * (Population1950to1980_Predicted.index.to_numpy(dtype=int) - x0))
).astype("int64")

#Population1950to1955 = Population1950to1955.set_index('Year')
#Population1950to1980_Predicted


Here is a graph showing an exponential comparison to the data.

In [None]:
Population1950to1980_Merged = pd.merge(
    Population1950to1980_Predicted.reset_index(),
    Population1950to1980.reset_index(),
    on="Year",
    how="outer"   # or 'inner' depending on whether you want all years or only common years
)
# Reshape wide → long for Plotly (all airlines in one column)
df_long = Population1950to1980_Merged.melt(id_vars='Year', 
                           value_vars=['Exponential Predicted Unincorporated Clark County',
                                       'Actual Unincorporated Clark County','Exponential Predicted Average Daily Visitors', 'Actual Average Daily Visitors'], 
                           var_name='Area', value_name='Population')

fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Predicted and Actual Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
    labels={'Population' : "Population"}
)

fig.update_traces(
    hovertemplate=(
        "Area = %{fullData.name}<br>"
        "Year = %{x}<br>"
        "Population = %{y:,}"
        "<extra></extra>"
    )
)

fig.update_layout(
    width=1200, height=600,
    yaxis_title="Population",
    #this puts the legend on the bottom
    legend=dict(
        title=dict(text="", side='top center'),  # legend title above items
        orientation="h",     # horizontal layout
        yanchor="top",
        y=-0.3,              # push below chart
        xanchor="center",
        x=0.5
    ),
    #centers the title
    title=dict(
        text="Predicted and Actual Population and Average Daily Vistors for Unincorporated Clark County 1950 to 1980",
        x=0.5,  # 0=left, 0.5=center, 1=right
        xanchor="center"
    )
)


fig.show()
#Population1950to1955

This predicts the overall trend much better between 1955 and 1975 for the Unincorporated Clark County data as it predicts 174,590 versus 158,037 observed which is within 10% and is considered a good prediction.  After 1975, however, it falls apart and grossly overpredicts population.  For the visitor volumes, however, it quickly deviates beyond 1960 and overestimates the data.
<br><br><b>The final graph below shows the effects of planners and engineers periodically updating their estimates.  The final results are much better, but obviously not perfect. </b>

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

def rolling_linreg_end_pred(df, ycol, window):
    # Design matrix and target
    X_full = sm.add_constant(df["Year"].to_numpy(dtype=float))
    y_full = pd.to_numeric(df[ycol], errors="coerce").to_numpy(dtype=float)

    # Keep only rows with finite y (and finite X)
    mask = np.isfinite(y_full) & np.isfinite(X_full).all(axis=1)
    X = X_full[mask]
    y = y_full[mask]

    pred_full = np.full(df.shape[0], np.nan, dtype=float)
    if X.shape[0] < window:
        return pd.Series(pred_full, index=df.index)

    res = RollingOLS(y, X, window=window).fit()

    # 1) Prefer fittedvalues if available (end-of-window fit)
    pred_subset = getattr(res, "fittedvalues", None)

    if pred_subset is None:
        # 2) Compute from rolling params (intercept + slope * x_t)
        params = res.params
        if isinstance(params, pd.DataFrame):
            b0 = (params["const"] if "const" in params else params.iloc[:, 0]).to_numpy()
            b1 = (params["Year"]  if "Year"  in params else params.iloc[:, 1]).to_numpy()
        else:
            P = np.asarray(params)
            # Try shapes (n_obs, k) or (k, n_obs)
            if P.ndim == 2 and P.shape[1] == X.shape[1]:
                b0, b1 = P[:, 0], P[:, 1]
            elif P.ndim == 2 and P.shape[0] == X.shape[1]:
                b0, b1 = P[0, :], P[1, :]
            else:
                # last resort: y - resid
                pred_subset = y - res.resid

        if pred_subset is None:
            pred_subset = b0 + b1 * X[:, 1]

    # Re-embed into full index
    pred_full[np.where(mask)[0]] = np.asarray(pred_subset).reshape(-1)
    return pd.Series(pred_full, index=df.index)


# ---- choose window (years) ----
w = 5  # change to 2/3/etc. if you want a shorter window

# ---- compute rolling linear-reg predictions ----
col_uninc = 'Actual Unincorporated Clark County'
col_visit = 'Actual Average Daily Visitors'

rlr_name_uninc = f'Unincorporated Clark County (Rolling LinReg {w}-yr)'
rlr_name_visit = f'Average Daily Visitors (Rolling LinReg {w}-yr)'

Population1950to1980_Merged[rlr_name_uninc] = rolling_linreg_end_pred(
    Population1950to1980_Merged, col_uninc, window=w
)
Population1950to1980_Merged[rlr_name_visit] = rolling_linreg_end_pred(
    Population1950to1980_Merged, col_visit, window=w
)

# (Optional) make them integers for display/storage
Population1950to1980_Merged[rlr_name_uninc] = (
    Population1950to1980_Merged[rlr_name_uninc].round().astype("Int64")
)
Population1950to1980_Merged[rlr_name_visit] = (
    Population1950to1980_Merged[rlr_name_visit].round().astype("Int64")
)

# ---- melt to long for plotting ----
df_long = Population1950to1980_Merged.melt(
    id_vars='Year', 
    value_vars=[
        'Actual Unincorporated Clark County',
        'Actual Average Daily Visitors',
        rlr_name_uninc,
        rlr_name_visit
    ],
    var_name='Area', value_name='Population'
)

# ---- plot ----
fig = px.line(
    df_long,
    x='Year', y='Population', color="Area",
    title="Actual vs Rolling Linear Regression (Unincorporated Clark County & Average Daily Visitors, 1950–1980)",
    labels={'Population': "Population"}
)

fig.update_traces(
    hovertemplate="Area = %{fullData.name}<br>Year = %{x}<br>Population = %{y:,}<extra></extra>"
)

fig.update_layout(
    width=1200, height=600,
    yaxis_title="Population",
    legend=dict(
        title=dict(text="", side='top center'),
        orientation="h", yanchor="top", y=-0.3, xanchor="center", x=0.5
    ),
    title=dict(text="Actual vs Rolling Linear Regression (1950–1980)", x=0.5, xanchor="center")
)

# Style the rolling-regression lines as dashed & thicker
fig.update_traces(selector=dict(name=rlr_name_uninc), line=dict(dash="dash", width=3))
fig.update_traces(selector=dict(name=rlr_name_visit), line=dict(dash="dash", width=3))

fig.show()
