In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
filename_DD = f"../data/df_DD_2025-04-15_17-01-11.csv"
filename_FB = f"../data/df_FB_2025-04-15_17-01-11.csv"
df_DD = pd.read_csv(filename_DD, parse_dates=["datetime_hour"], index_col=None)
df_FB = pd.read_csv(filename_FB, parse_dates=["datetime_hour"], index_col=None)


In [None]:
df_DD[df_DD['hex_id'] == "871f1b559ffffff"].agg({"rent_count": "sum"}) / (df_DD.rent_count.sum())

In [None]:
df_DD[df_DD['hex_id'] == "871f1b559ffffff"].agg({"return_count": "sum"}) / (df_DD.return_count.sum())

In [None]:
df_DD[df_DD['hex_id'] == "871f1b559ffffff"].agg({"return_count": "sum"}) 

In [None]:
df_DD.groupby("hex_id").agg({"rent_count": "sum"}).sort_values("rent_count", ascending=False).head(10) / (df_DD.return_count.sum())

## df transformations

In [None]:
def transform(df_input, city):
    df = df_input.copy()
    dropped_colnames = [col for col in df.columns if col.startswith("weekday")]
    dropped_colnames.extend(["return_count", "event_count_end", "hex_id"])
    df.drop(columns=dropped_colnames, inplace=True, errors="ignore")
    df.rename(columns={"datetime_hour": "date", "rent_count": "Bike trips", "event_count_start": "Public events"}, inplace=True, errors="ignore")

    df.date = df.date.dt.date
    df = df.groupby("date").agg({"Bike trips": "sum",
                                    "Temperature": "mean",
                                    "Humidity": "mean", 
                                    "Precipitation": "sum",
                                    "Wind": "mean",
                                    "is_dayoff": "max",
                                    "Public events": "sum" }).reset_index()
    whole_range = pd.date_range(start=df.date.iloc[0], end=df.date.iloc[-1], freq="D")
    if city == "DD":
        whole_range = whole_range[~whole_range.month.isin([5,6,7,8])]
    elif city=="FB":
        flt = (whole_range.month.isin([9,10,11,12])) & (whole_range.year==2023)
        whole_range = whole_range[~flt]
        flt = (whole_range.month.isin([1,2,3,4,5,6,7,8])) & (whole_range.year==2024)
        whole_range = whole_range[~flt]



    df = df.set_index("date")
    df = df.reindex(whole_range)
    df = df.reset_index()
    df.rename(columns={"index": "date"}, inplace=True)
    df.sample(10)
    df["date_str"] = df.date.dt.strftime("%Y-%m-%d")
    df.sort_values("date", inplace=True)
    # df.date = df.date.astype("str")
    # df.date.dt.month.unique()
    return df 
    

In [None]:
df_DD = transform(df_DD, "DD")

In [None]:
df_FB = transform(df_FB, "FB")

## plot settings

In [None]:
secondary_factors = ["Temperature", "Humidity", "Precipitation", "Wind", "Public events"]
smaller_fontsize = 11

In [None]:
y_ranges = {
    "DD":
{   
    "Bike trips": [0, 15000], 
     "Temperature": [-10, 30],
    "Humidity": [0, 100],
    "Precipitation": [0, 200],
    "Wind": [0, 20],
    "Public events": [0, 350]
},
"FB":
{   "Bike trips": [0, 6000], "Temperature": [-10, 30],
    "Humidity": [0, 100],
    "Precipitation": [0, 200],
    "Wind": [0, 20],
    "Public events": [0, 250]
}

}

In [None]:
color_dict = {
    "Bike trips": "steelblue",
    "Temperature": "firebrick",
    "Humidity": "darkorange",
    "Precipitation": "mediumseagreen",
    "Wind": "purple",
    "Public events": "blueviolet"
}

In [None]:
df_helper = {"DD": df_DD, "FB": df_FB}

## plot

In [None]:
spec_value = {"secondary_y": True}
my_specs = [[spec_value for _ in range(2)] for foo in secondary_factors] 
print(my_specs)

fig = make_subplots(rows = len(secondary_factors), cols=2, shared_xaxes=True, vertical_spacing=0.02, horizontal_spacing=0.15,
                    specs=my_specs) 

ax1_name = "Bike trips"
for city_id, city in enumerate(df_helper.keys()):
    col = city_id+1

    for i, ax2_name in enumerate(secondary_factors):
        row = i+1

        fig.add_trace(go.Scatter( x=df_helper[city]["date_str"], y=df_helper[city][ax1_name], name=ax1_name, line=dict(color="steelblue", width=1.5), connectgaps=False, ), row=row, col=col, secondary_y=False )
        fig.add_trace(go.Scatter( x=df_helper[city]["date_str"], y=df_helper[city][ax2_name], name=ax2_name, yaxis="y2",line=dict(color=color_dict[ax2_name], width=1.5),connectgaps=False, ), row=row, col=col, secondary_y=True )

        fig.update_yaxes(title_text=ax1_name, row=i+1, col=col, secondary_y=False, range=y_ranges[city][ax1_name], title_font=dict(size=smaller_fontsize), tickfont=dict(size=smaller_fontsize))
        fig.update_yaxes(title_text=ax2_name, row=i+1, col=col, secondary_y=True, range=y_ranges[city][ax2_name], title_font=dict(size=smaller_fontsize), tickfont=dict(size=smaller_fontsize)) 
    
    fig.update_xaxes(title_text="Date", row=len(secondary_factors), col=col, title_font=dict(size=smaller_fontsize), tickfont=dict(size=9), ticklabelstep=11, )


fig.update_layout(height = 210*len(secondary_factors), width=1150, template="plotly_white", showlegend=False, xaxis=dict(tickangle=-90), font=dict(size=smaller_fontsize) )
fig.update_yaxes(showgrid=False)
fig.update_xaxes(type='category')
fig.update_xaxes(showgrid=False)
fig.update_xaxes(tickangle=-90)
fig.update_xaxes(ticklabelstep=3)

fig.update_layout(
    annotations=[
        # Title for left column (e.g. Dresden)
        dict(
            text="Dresden",
            x=0.17,  # adjust based on subplot spacing
            xref="paper",
            y=1.05,
            yref="paper",
            showarrow=False,
            font=dict(size=16, family="Arial"),
            align="center"
        ),
        # Title for right column (e.g. Freiburg)
        dict(
            text="Freiburg",
            x=0.8,  # adjust based on subplot spacing
            xref="paper",
            y=1.05,
            yref="paper",
            showarrow=False,
            font=dict(size=16, family="Arial"),
            align="center"
        )
    ]
)

fig.show()
    
fig.write_image("/Users/v.sinichenko/Downloads/images/All_daily_lineplots.png")

## good plots were used for training

In [None]:
# ## issue: you cannot prevent sns from connecting the lines
# import seaborn as sns
# sns.set_style("whitegrid") 
# fix, ax1 = plt.subplots(figsize=(10, 6))
# ax1_name = "Bike trips"
# ax2_name = "Temperature"

# sns.lineplot(data=df, x="date", y=ax1_name, ax=ax1, color="steelblue", label=ax1_name)
# ax2 = ax1.twinx()
# sns.lineplot(data=df, x="date", y="Temperature", ax=ax2, color="firebrick", label=ax2_name)
# ax1.set_ylabel(ax1_name)
# ax2.set_ylabel(ax2_name)
# plt.legend()
# plt.show()

In [None]:
# # good plot
# fig = go.Figure()
# ax1_name = "Bike trips"
# ax2_name = "Temperature"

# fig.add_trace(go.Scatter( x=df["date_str"], y=df[ax1_name], name=ax1_name, line=dict(color="steelblue"), connectgaps=False, ))

# fig.add_trace(go.Scatter( x=df["date_str"], y=df[ax2_name], name=ax2_name, yaxis="y2",line=dict(color=color_dict[ax2_name]),connectgaps=False, ))

# # Layout with two y-axes
# fig.update_layout( yaxis=dict(title=ax1_name, range=[0,None]),
#     yaxis2=dict(title=ax2_name, overlaying="y", side="right" ),
#     legend=dict(x=0.5, y=1.1, orientation="h", xanchor="center"),
#     width=800, height=600, template="plotly_white", xaxis_type='category' )

# fig.show()
