In [17]:
################################################################################################################

import os
import pandas as pd 

################################################################################################################

def walking_directory_tree(directory_path):
    '''
    Walking a directory tree and printing the names of the directories and files.
    Returns a tuple of (list of dirnames, list of filenames).
    ''' 
    # print(f'Found directory: {directory_path}') 
    
    dirnames_list = []
    filenames_list = []
    for dirpath, dirnames, files in os.walk('.'):
        for folder_name in dirnames:
            dirnames_list.append(folder_name)
        for file_name in files:
            filenames_list.append(file_name)
    return(dirnames_list, filenames_list)


################################################################################################################

def concatenate_df(root, filetype, filename):
    '''
    Ex input: (r"E:\Airbnb\Data\amsterdam\calendar\minis", "calendar", "calendar.csv")
    '''
    # List of folders to parse 
    listdir = walking_directory_tree(root)[0]
    
    dict = {} # For creating names for our datafranes 
    for i in range(len(listdir)):
        dict["df{0}".format(i)] = pd.read_csv(root + "\\" + listdir[i] + "\\" + filename)
    
    # We can perhaps extend this function later. 
    if filetype == "calendar":
        # Need to get rid of irrelevant columns. 
        for df in dict.values():
            df.drop(columns=["count","std","min","25%","50%","75%","max"], inplace=True)
            # Also, the date needs to be changed to datetime. 
            df["date"] = pd.to_datetime(df["date"]) # If not already datetime. 
            
    # List to hold intermediate dataframes.
    lst = []
    for df in dict.values():
        df.set_index('date', inplace=True) # Vezi aici!
        lst.append(df)
        
    for i in range(len(lst)):
        lst[i] = lst[i].rename(columns={'mean':str(listdir[i])})
    
    concat_calendarday = pd.concat(lst, axis=1)
    concat_calendarday['average_price'] = concat_calendarday.mean(axis=1)
    concat_calendarday['date'] = concat_calendarday.index
    concat_calendarday["date"] = pd.to_datetime(concat_calendarday["date"])
    calendar = concat_calendarday[['average_price','date']]
    
    return (calendar)

In [18]:
walking_directory_tree(root)

(['2017-05-08',
  '2018-04-13',
  '2018-05-15',
  '2018-06-10',
  '2018-07-11',
  '2018-08-15',
  '2018-09-12',
  '2018-10-10',
  '2018-11-09',
  '2018-12-11',
  '2019-01-16',
  '2019-02-07',
  '2019-03-08',
  '2019-04-11',
  '2019-05-14',
  '2019-06-08',
  '2019-07-12',
  '2019-08-12',
  '2019-09-20',
  '2019-10-16',
  '2019-11-16',
  '2019-12-12',
  '2020-01-12',
  '2020-02-19'],
 ['rome_calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv',
  'calendar.csv'])

In [25]:
################################################################################################################

root = r"C:\Users\aleen\Desktop\Master Thesis\Data\paris\calendar\minis"
listdir = os.listdir(root) # returns list
os.chdir(root)

walking_directory_tree(root)

calendar = concatenate_df(root, "calendar", "calendar.csv")
calendar.to_csv(root + "\\" + "paris_calendar.csv")