In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from threading import Timer
import influxdb_client, os, time
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from plotly.subplots import make_subplots
import pyarrow as pa
import pyarrow.parquet as pq
from scipy import stats
import statistics 
from datetime import timedelta
import logging
import matplotlib.dates as mdates



def read_token(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()
token = read_token('token_read.txt')
org = "your org"
url = 'your url'
write_client = influxdb_client.InfluxDBClient(url=url, token=token, org=org)

days = '77'
from_parquet = False
export = True



logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

start_time_filter = pd.Timestamp('2025-01-01 00:06:00', tz='UTC')
end_time_filter = pd.Timestamp('2025-03-15 23:59:59', tz='UTC')

def preprocess_subset(df,
                      timecol='_time',
                      start_time_filter=start_time_filter,
                      end_time_filter=end_time_filter,
                      exclude_ids=['tvbox-tx2-07','tvbox-e10-01','tvbox-e10-02','tvbox-e10-03'],
                      device='tx2'):
    logger.info(f"Original DataFrame shape: {df.shape}")
    
    
    # Convert to datetime objects
    df[timecol] = pd.to_datetime(df[timecol], unit='ns', utc=True)
    
    logger.info(f"After datetime conversion, DataFrame shape: {df.shape}")
    
    # Add a flag column to indicate whether each row falls within the specified date range
    df['within_date_range'] = df[timecol].between(start_time_filter, end_time_filter)
    df = df.loc[df[timecol].between(start_time_filter, end_time_filter)]
    
    
    # Remove e10 luis and other excluded IDs
    if exclude_ids is not None:
        df = df[~df['pi-id'].isin(exclude_ids)]
    
    logger.info(f"After ID filtering, DataFrame shape: {df.shape}")
    
    if device == 'tx2':
        df['device_type'] = df['pi-id'].apply(lambda x: 'TX2' if 'tx2' in x else 'other')
    elif device == 'e10':
        df['device_type'] = df['pi-id'].apply(lambda x: 'E10' if 'btv' in x else 'other')
    else:
        df['device_type'] = 'other'
    
    df['model'] = df['pi-id'].apply(lambda x: 'YOLOv8n' if x.endswith('_n') else 'YOLOv10n')
    df['date'] = pd.to_datetime(df[timecol]).dt.date

    logger.info(f"Final DataFrame shape: {df.shape}")
    
    return df


if from_parquet == True:
    df_prod =  pd.read_parquet('df_prod.parquet')
else:
    query_api = write_client.query_api()
    query = f"""from(bucket: "ic2_parking")
    |> range(start: -{days}d)
    """
    df_prod = query_api.query_data_frame(query, org="Unicamp")
    if export == True:
        table = pa.Table.from_pandas(df_prod, preserve_index=True)
        pq.write_table(table, 'df_prod.parquet')

df_prod

df_prod = preprocess_subset(df_prod,device='e10')
df_prod

# Assuming df_prod is your DataFrame with the original data
# Convert _time column to datetime format
df_prod['_time'] = pd.to_datetime(df_prod['_time'])

# Convert _value column to integer format
df_prod['_value'] = df_prod['_value'].astype(int)

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df_prod['_time'], df_prod['_value'], marker='o', linestyle='-', color='b')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Time Series of Detected Cars')
plt.grid(True)
plt.xticks(rotation=45)
# Set y-axis ticks to integer values
plt.yticks(range(df_prod['_value'].min(), df_prod['_value'].max() + 1))
plt.tight_layout()  # Adjust layout to prevent cut-off labels
plt.show()


# Convert _time column to datetime format
df_prod['_time'] = pd.to_datetime(df_prod['_time'])

# Define holidays (March 1 to March 5, 2025)
holidays = pd.date_range(start='2025-03-01', end='2025-03-05', tz='UTC')

# Convert _value column to integer format
df_prod['_value'] = df_prod['_value'].astype(int)

# Use a simpler style to avoid grid issues
plt.style.use('seaborn-v0_8-paper')

main_line_color = "black"  # Deep blue (main data line)
weekend_color = "blue"    # Dark gray (weekends)
holiday_color = "#2b8c74"    # Dark teal (holidays)
highlight_color = "#6a3d9a"  # Dark purple (Feb 24 highlight)

# Create a wider figure
fig, ax = plt.subplots(figsize=(12, 6))  # Wider figure (12x6)

# Plot main data line
ax.plot(df_prod['_time'][::300], df_prod['_value'][::300], linestyle='-', 
        color=main_line_color, alpha=0.7, linewidth=2, label="Parking Spots")

# Highlight weekends & holidays
time_sliced = df_prod['_time'][::300]  # Sliced timestamps for plotting
holiday_label_added = False  # Ensure "Holidays" label appears only once
weekend_label_added = False  # Ensure "Weekends" label appears only once

for i, timestamp in enumerate(time_sliced):
    if i + 1 < len(time_sliced):
        next_timestamp = time_sliced.iloc[i + 1]
    else:
        next_timestamp = timestamp

    if timestamp.weekday() >= 5:  # Weekends (Saturday = 5, Sunday = 6)
        ax.axvspan(timestamp, next_timestamp, color=weekend_color, alpha=0.3,  # Increased alpha
                   label="Weekends" if not weekend_label_added else "")
        weekend_label_added = True

    elif timestamp.date() in holidays.date:  # Holidays (compare only dates)
        ax.axvspan(timestamp, next_timestamp, color=holiday_color, alpha=0.3,  # Increased alpha
                   label="Holidays" if not holiday_label_added else "")
        holiday_label_added = True

# Highlight February 24
feb_24 = pd.to_datetime('2025-02-24')
ax.axvline(feb_24, color=highlight_color, linestyle="--", linewidth=2, label="Classes Returned (Feb 24)")

# Labels and title
ax.set_xlabel('Timestamp', fontsize=18, fontweight='bold')
ax.set_ylabel('Parking Spots', fontsize=18, fontweight='bold')
ax.set_title('Time Series of Parking Spots', fontsize=22, fontweight='bold')

# Formatting x-axis for better readability
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
plt.xticks(rotation=25, fontsize=14)

# Set y-axis ticks dynamically
y_min, y_max = df_prod['_value'].min(), df_prod['_value'].max()
ax.set_yticks(range(y_min, y_max + 1, max(1, (y_max - y_min) // 8)))
ax.tick_params(axis='y', labelsize=14)

# Explicitly disable grid
ax.grid(False)  # Ensure no grid lines are displayed

# Add legend (removing duplicate labels)
handles, labels = ax.get_legend_handles_labels()
by_label = dict(zip(labels, handles))  # Remove duplicate labels
ax.legend(by_label.values(), by_label.keys(), fontsize=12)

# Adjust layout
plt.tight_layout()
plt.savefig('cars_influx.pdf')

# Show plot
plt.show()
