In [2]:
import pandas as pd
import datetime
import numpy as np

In [3]:
def process_excel_data(filename, nrows=None):

  df = pd.read_excel(filename, skiprows=4, nrows = nrows)

  processed_df = pd.DataFrame()

  unformatted_address = ''
  for i, row in df.iterrows():
    #print("row : " + str(i))
    if i == 0 or i % 9 == 0:
      unformatted_address = row['Unnamed: 0']
    elif (i - 1) % 9 == 0:
      sub_df = df.iloc[i:i+7, :].reset_index().drop('index', axis=1)
      sub_df.columns = sub_df.iloc[0]
      sub_df = sub_df.drop(sub_df.index[0])
      #print(sub_df)

      #quarters_list = sub_df['Quarters'].tolist()

      df_transposed = sub_df.T.reset_index()
      #print(df_transposed)

      df_transposed = df_transposed.iloc[1:]
      df_transposed = df_transposed.rename(columns={
          0: "Quarters",
          1: "Asking Rent",
          2: "Effective Rent",
          3: "Effective RPSF",
          4: "Occupancy",
          5: "Concession ($)",
          6: "Concession (%)"
      })
      
      df_transposed['Address'] = unformatted_address
      #print(df_transposed)
      processed_df = pd.concat([processed_df, df_transposed], axis=0)
      
      unformatted_address = ''
  return processed_df

In [4]:
def process_address_and_time(df):
  df[['Index', 'Street Address', 'City', 'State', 'Zip']] = df['Address'].str.split(', ', expand=True)
  df['Year'] = df['Quarters'].str[2:]  # extract the two digits after 'Q'
  df['Year'] = np.where(df['Year'].astype(int) > 23, '19' + df['Year'], '20' + df['Year'])
  df['Quarter'] = df['Quarters'].str[:1]
  return df

In [5]:
# Run for all files in a folder
# 13 files took 8s

import os

# Change the folder_path as needed
folder_path = "/content"

all_dfs = pd.DataFrame()

for file_name in os.listdir(folder_path):
  #print(file_name)
  if file_name.endswith(".xlsx"):
    sub_df = process_excel_data(file_name)
    sub_df = process_address_and_time(sub_df)
    all_dfs = pd.concat([all_dfs, sub_df], axis=0)

all_dfs



Unnamed: 0,Quarters,Asking Rent,Effective Rent,Effective RPSF,Occupancy,Concession ($),Concession (%),Address,Index,Street Address,City,State,Zip,Year,Quarter
1,1Q95,,,,,,,"[1] 10X Weston, 1343 St Tropez Cir, Weston, FL...",[1] 10X Weston,1343 St Tropez Cir,Weston,FL,33326,1995,1
2,2Q95,,,,,,,"[1] 10X Weston, 1343 St Tropez Cir, Weston, FL...",[1] 10X Weston,1343 St Tropez Cir,Weston,FL,33326,1995,2
3,3Q95,,,,,,,"[1] 10X Weston, 1343 St Tropez Cir, Weston, FL...",[1] 10X Weston,1343 St Tropez Cir,Weston,FL,33326,1995,3
4,4Q95,,,,,,,"[1] 10X Weston, 1343 St Tropez Cir, Weston, FL...",[1] 10X Weston,1343 St Tropez Cir,Weston,FL,33326,1995,4
5,1Q96,,,,,,,"[1] 10X Weston, 1343 St Tropez Cir, Weston, FL...",[1] 10X Weston,1343 St Tropez Cir,Weston,FL,33326,1996,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108,4Q21,2255,2255,2.102,0.9875,0,0,"[33] Windsor Coconut Creek, 5400 Lyons Rd, Coc...",[33] Windsor Coconut Creek,5400 Lyons Rd,Coconut Creek,FL,33073,2021,4
109,1Q22,2203,2203,2.053,0.9964,0,0,"[33] Windsor Coconut Creek, 5400 Lyons Rd, Coc...",[33] Windsor Coconut Creek,5400 Lyons Rd,Coconut Creek,FL,33073,2022,1
110,2Q22,2428,2428,2.263,0.9484,0,0,"[33] Windsor Coconut Creek, 5400 Lyons Rd, Coc...",[33] Windsor Coconut Creek,5400 Lyons Rd,Coconut Creek,FL,33073,2022,2
111,3Q22,2357,2357,2.197,0.9377,0,0,"[33] Windsor Coconut Creek, 5400 Lyons Rd, Coc...",[33] Windsor Coconut Creek,5400 Lyons Rd,Coconut Creek,FL,33073,2022,3


In [None]:
# Export the formatted data out
all_dfs.to_csv("formatted_financial_data.csv", index=False)
