In [1]:
import pandas as pd
import sqlite3
from datetime import datetime, timedelta
import calendar

In [2]:
# ------------------------------
# Load CSV
# ------------------------------
fuel_df = pd.read_csv("./29092025_Latest_DB/csv/tbl_carfule.csv")

# Strip text values only
for col in fuel_df.select_dtypes(include="object").columns:
    fuel_df[col] = fuel_df[col].str.strip().str.lower()

In [3]:
# ------------------------------
# Helper: parse month-year robustly
# ------------------------------
def parse_month_year(month_str: str):
    month_str = str(month_str).strip()
    for fmt in ("%b-%y", "%b-%Y", "%B%y", "%B%Y"):
        try:
            return datetime.strptime(month_str, fmt)
        except ValueError:
            continue
    raise ValueError(f"Unrecognized date format: {month_str}")

In [4]:
# ------------------------------
# Function to expand month -> daily entries
# ------------------------------
def expand_monthly(row):
    entries = []
    start_date = parse_month_year(row["FuleDT"])
    days_in_month = calendar.monthrange(start_date.year, start_date.month)[1]
    
    if float(row["FuleAmt"]) == 0:
        return []
    
    daily_amt = float(row["FuleAmt"]) / days_in_month
    
    for day in range(days_in_month):
        date = start_date + timedelta(days=day)
        entries.append({
            "vehicle_id": row["CarID"],
            "refuel_date": date.strftime("%Y-%m-%d"),
            "fuel_amount": f"{daily_amt:.2f}",
            "created_on": date.strftime("%Y-%m-%d %H:%M:%S"),
            "updated_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        })
    return entries

In [5]:
# ------------------------------
# Expand all rows
# ------------------------------
all_entries = []
for _, row in fuel_df.iterrows():
    all_entries.extend(expand_monthly(row))

final_df = pd.DataFrame(all_entries)

In [6]:
# ------------------------------
# Insert into SQLite without dropping id
# ------------------------------
database = "./29092025_Latest_DB/guide-mts-database.sqlite3"

final_df_nodup = final_df.drop_duplicates(subset=["vehicle_id", "refuel_date"])

conn = sqlite3.connect(database, timeout=30)
conn.execute("PRAGMA journal_mode=WAL;")  # optional: prevent locking

# Insert using executemany and preserve existing table
rows_to_insert = [
    (r.vehicle_id, r.refuel_date, r.fuel_amount, r.created_on, r.updated_on)
    for r in final_df_nodup.itertuples(index=False)
]

conn.executemany("""
    INSERT OR IGNORE INTO daily_fuel_entries
    (vehicle_id, refuel_date, fuel_amount, created_on, updated_on)
    VALUES (?, ?, ?, ?, ?)
""", rows_to_insert)

conn.commit()
conn.close()

print("✅ Daily fuel entries inserted successfully")
print(final_df_nodup.head(10))

✅ Daily fuel entries inserted successfully
   vehicle_id refuel_date fuel_amount           created_on  \
0           2  2013-07-01      118.42  2013-07-01 00:00:00   
1           2  2013-07-02      118.42  2013-07-02 00:00:00   
2           2  2013-07-03      118.42  2013-07-03 00:00:00   
3           2  2013-07-04      118.42  2013-07-04 00:00:00   
4           2  2013-07-05      118.42  2013-07-05 00:00:00   
5           2  2013-07-06      118.42  2013-07-06 00:00:00   
6           2  2013-07-07      118.42  2013-07-07 00:00:00   
7           2  2013-07-08      118.42  2013-07-08 00:00:00   
8           2  2013-07-09      118.42  2013-07-09 00:00:00   
9           2  2013-07-10      118.42  2013-07-10 00:00:00   

            updated_on  
0  2025-10-01 12:34:00  
1  2025-10-01 12:34:00  
2  2025-10-01 12:34:00  
3  2025-10-01 12:34:00  
4  2025-10-01 12:34:00  
5  2025-10-01 12:34:00  
6  2025-10-01 12:34:00  
7  2025-10-01 12:34:00  
8  2025-10-01 12:34:00  
9  2025-10-01 12:34:00  