In [10]:
import pyodbc
import os
import dotenv
import pandas as pd
import re
from datetime import date


In [11]:
# --- User input ---
year = input("Enter year (e.g. 2025): ").strip()
quarter = input("Enter quarter (Q1, Q2, Q3, Q4): ").strip().upper()

# --- Basic validation ---
if quarter not in {"Q1", "Q2", "Q3", "Q4"}:
    raise ValueError("Quarter must be one of: Q1, Q2, Q3, Q4")

if not year.isdigit() or len(year) != 4:
    raise ValueError("Year must be a 4-digit number")

# --- Build directory path ---
HOME = os.path.join(
    "C:\\Users",
    os.environ.get("USERNAME"),
    "Documents",
    "Equity",
    f"{year}_{quarter}"
)

# --- Create directory if it doesn't exist ---
os.makedirs(HOME, exist_ok=True)

print(f"Directory ready: {HOME}")

Directory ready: C:\Users\MANJANID\Documents\Equity\2025_Q3


In [12]:

# Base path (up to Equity)
BASE_DIR = os.path.join(
    "C:\\Users",
    os.environ.get("USERNAME"),
    "Documents",
    "Equity"
)

# Lowest folder = year_quarter folder
HOME = os.path.join(BASE_DIR, f"{year}_{quarter}")
os.makedirs(HOME, exist_ok=True)  # creates year_quarter folder if missing (silent)

# Then data folder inside it
DATA_DIR = os.path.join(HOME, "data")
os.makedirs(DATA_DIR, exist_ok=True)  # creates data folder if missing (silent)

# Check for parquet/csv files inside DATA_DIR
data_files = [
    f for f in os.listdir(DATA_DIR)
    if f.lower().endswith((".parquet", ".csv"))
]

if data_files:
    update = input(
        f"Data already exists in {DATA_DIR} ({len(data_files)} file(s)). "
        "Do you want to update? (y/n): "
    ).strip().lower()

    if update != "y":
        print("Using existing data. No update performed.")
    else:
        print("Update requested. Proceeding...")
else:
    # No data found (silent)
    pass


Update requested. Proceeding...


In [13]:
# Close cursor if it exists
try:
    cursor.close()
except Exception:
    pass

# Close connection if it exists
try:
    cnxn.close()
except Exception:
    pass

print("Database connection closed.")

Database connection closed.


In [14]:
dotenv.load_dotenv()

try:
    cnxn = pyodbc.connect('DRIVER={SQL Server}; \
                      SERVER=' + os.getenv('server') + '; \
                      DATABASE=' + os.getenv('database') +';\
                      UID=' + os.getenv('user') + '; \
                      PWD=' + os.getenv('pswd') + ';\
                      Trusted_Connection=no;')
    
    print('Connected to databse')

except pyodbc.Error as ex:
    
    print('Connection failed', ex)

Connected to databse


In [15]:
# Extract year & quarter from folder name
folder_name = os.path.basename(HOME)   # e.g. 2025_Q2
year, quarter = folder_name.split("_")
year = int(year)

quarter_end_map = {
    "Q1": (3, 31),
    "Q2": (6, 30),
    "Q3": (9, 30),
    "Q4": (12, 31),
}

month, day = quarter_end_map[quarter]
quarter_end_date = date(year, month, day).isoformat()

print(f"Quarter end date resolved as: {quarter_end_date}")


Quarter end date resolved as: 2025-09-30


In [16]:
sql_file_path = "cashflows.txt"  # must be accessible in your environment

with open(sql_file_path, "r", encoding="utf-8") as f:
    sql_text = f.read()

sql_text_updated = re.sub(
    r"\{d'YYYY-MM-DD'\}",
    f"{{d'{quarter_end_date}'}}",
    sql_text
)

print("SQL placeholder date updated.")

SQL placeholder date updated.


In [17]:
for line in sql_text_updated.splitlines():
    if "CloseDate" in line:
        print(line)

  , cf.[CloseDate] AS [Transaction Date]
  , DATEADD(Q, DATEDIFF(Q, -1, cf.[CloseDate]), -1) AS [Transaction EoQ]
  , CONVERT(VARCHAR(4), DATEPART(YEAR, cf.[CloseDate])) + '-Q' + CONVERT(VARCHAR(1), DATEPART(QUARTER, cf.[CloseDate])) AS [Transaction Quarter]
  , DATEADD(Q, DATEDIFF(Q, -1, MIN(cf.[CloseDate]) OVER (PARTITION BY cf.[FundID]) ), -1) AS [First Transaction EoQ]
  , DATEADD(Q, DATEDIFF(Q, -1, MAX(cf.[CloseDate]) OVER (PARTITION BY cf.[FundID])), -1) AS [Last Transaction EoQ]
            WHEN cf.transactiontype IN ('Valuation', 'Secondary Sale Proceeds') THEN cf.[CloseDate]
          DATEADD(Q, DATEDIFF(Q, -1, cf.[CloseDate]), -1) -- Transaction EoQ
  AND cf.[CloseDate] < {d'2025-09-30'}


In [18]:

cursor = cnxn.cursor()

# 1) Run the whole script (creates temp tables etc.)
cursor.execute(sql_text_updated)

# 2) Advance through intermediate results until the final SELECT resultset is available
while cursor.description is None:
    if not cursor.nextset():
        raise RuntimeError("SQL executed but no final result set was returned. Check the last SELECT in cashflows.txt.")

# 3) Fetch final result set into pandas
cols = [c[0] for c in cursor.description]
rows = cursor.fetchall()
df = pd.DataFrame.from_records(rows, columns=cols)

print(f"Rows returned: {len(df):,}")


Rows returned: 84,783


In [19]:
# Deduplicate column names (Parquet-safe)
cols = []
counts = {}

for col in df.columns:
    if col in counts:
        counts[col] += 1
        cols.append(f"{col}.{counts[col]}")
    else:
        counts[col] = 0
        cols.append(col)

df.columns = cols

csv_path = os.path.join(DATA_DIR, "cashflows.csv")
parquet_path = os.path.join(DATA_DIR, "cashflows.parquet")

# Save files
df.to_csv(csv_path, index=False)
df.to_parquet(parquet_path, index=False)

print("Files saved successfully:")
print(csv_path)
print(parquet_path)


Files saved successfully:
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\cashflows.csv
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\cashflows.parquet


In [20]:
# --- Load KMP SQL ---
kmp_sql_path = "kmp.txt"   # adjust path if needed

with open(kmp_sql_path, "r", encoding="utf-8") as f:
    kmp_sql = f.read()

print("KMP SQL loaded.")

KMP SQL loaded.


In [21]:
cursor = cnxn.cursor()
cursor.execute(kmp_sql)

# Skip non-result statements (temp tables, etc.)
while cursor.description is None:
    if not cursor.nextset():
        raise RuntimeError("KMP SQL executed but no final result set was returned.")

cols = [c[0] for c in cursor.description]
rows = cursor.fetchall()

df_kmp = pd.DataFrame.from_records(rows, columns=cols)

print(f"KMP rows returned: {len(df_kmp):,}")


KMP rows returned: 9,772


In [22]:
csv_path = os.path.join(DATA_DIR, "kmp.csv")
parquet_path = os.path.join(DATA_DIR, "kmp.parquet")

df_kmp.to_csv(csv_path, index=False)
df_kmp.to_parquet(parquet_path, index=False)

print("KMP files saved:")
print(csv_path)
print(parquet_path)

KMP files saved:
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\kmp.csv
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\kmp.parquet


In [23]:
# --- Load SQL ---
grades_sql_path = "grades.txt"  # adjust path if needed

with open(grades_sql_path, "r", encoding="utf-8") as f:
    grades_sql = f.read()

print("Grades SQL loaded.")

Grades SQL loaded.


In [24]:
cursor = cnxn.cursor()
cursor.execute(grades_sql)

# Skip non-result statements (temp tables, etc.)
while cursor.description is None:
    if not cursor.nextset():
        raise RuntimeError("Grades SQL executed but no final result set was returned.")

cols = [c[0] for c in cursor.description]
rows = cursor.fetchall()

df_grades = pd.DataFrame.from_records(rows, columns=cols)

print(f"Grades rows returned: {len(df_grades):,}")

Grades rows returned: 23,492


In [25]:
csv_path = os.path.join(DATA_DIR, "grades.csv")
parquet_path = os.path.join(DATA_DIR, "grades.parquet")

df_grades.to_csv(csv_path, index=False)
df_grades.to_parquet(parquet_path, index=False)

print("Grades files saved:")
print(csv_path)
print(parquet_path)

Grades files saved:
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\grades.csv
C:\Users\MANJANID\Documents\Equity\2025_Q3\data\grades.parquet
