In [10]:
import os.path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Provide paths to the respective folders
script_folder = r'D:\UoK\OneDrive - University of Kentucky\github\Transit_ridership\transit_ridership_decline\Factors and Ridership Data\code'
load_data = r'D:\UoK\OneDrive - University of Kentucky\github\Transit_ridership\transit_ridership_decline\Factors and Ridership Data\Estimation_File'
output_folder = r'D:\UoK\OneDrive - University of Kentucky\github\Transit_ridership\transit_ridership_decline\Factors and Ridership Data\Script Outputs'
folder_path = ''
file_name = ''
sFoldername = ""

In [11]:
def create_clusterwise_files():    
    # read the basic estimation file    
    os.chdir(load_data) 
    df = pd.read_csv('estimation_file2.csv')
    
    # create cluster-wise files
    df_cluster_gt = df.groupby(['CLUSTER_GT_NEW_11','Mode','Year']).sum().reset_index()
    df_cluster_gt_grouped = df.groupby(['CLUSTER_GT_8_GROUPS','Mode','Year']).sum().reset_index()
    df_cluster_apta = df.groupby(['CLUSTER_APTA','Mode','Year']).sum().reset_index()
    # df_cluster_apta_grouped = df.groupby(['CLUSTER_APTA_GROUPED','Mode','Year']).sum().reset_index()
    
    os.chdir(os.path.join(output_folder, "Clusterwise_data_charts"))

    df_cluster_gt.to_csv('CLUSTER_GT_NEW_11.csv')
    df_cluster_gt_grouped.to_csv('CLUSTER_GT_8_GROUPS.csv')
    df_cluster_apta.to_csv('CLUSTER_APTA.csv')
    # df_cluster_apta_grouped.to_csv('CLUSTER_APTA_GROUPED.csv')

In [12]:
def generate_charts(_dataset, _filename, _chartname, _foldername):
    df = _dataset
    file_name = _filename
    clustercolumn = file_name
    chartname = _chartname
    foldername = _foldername
    
    sFoldername = output_folder + "\\Clusterwise_data_charts\\Charts\\" + foldername
    
    yrs = df['Year'].unique()
    yrs.sort()
    clusters = df[chartname].unique()
    clusters.sort()
    modes = df['Mode'].unique()
    modes.sort()
    
    for cluster in clusters:
        df_fltr = df[df[clustercolumn] == cluster]
        # Print the cluster 
        col_index = df_fltr.columns.get_loc(clustercolumn)
        cluster_code = str(df_fltr.iloc[0, col_index])
        print('Cluster Code:' + str(cluster_code))
        df_fltr['Year'] = pd.to_datetime(df_fltr['Year'].astype(str), format='%Y')
        df_fltr_mod = df_fltr.set_index(pd.DatetimeIndex(df_fltr['Year']).year)        
        # get number of sub-plots defined - 4*2 means 4 rows having 2 graphs (each sized 18x9) in each row = 8 graphs
        fig, ax = plt.subplots(nrows=4, ncols=2, figsize=(18, 9))        
        x = 1
        for mode in modes:
            df_fltr_mode = df_fltr_mod[df_fltr_mod.Mode == mode]

            # Year vs Total_Population --> Graph (0,0)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='Tot_Pop', label=str(mode), ax=ax[0][0], legend=True)
            ax[0][0].set(xlabel="Years", ylabel='Tot Pop')
            ax[0][0].legend(loc='best')            
            # ax[0][0].set_autoscaley_on(True)
            # ax[0][0].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # try:                                
            #     ax[0][0].grid(False)
            #     ax[0][0].margins(0.20)
            #     ax[0][0].set_ylim([0,df_fltr_mode['Tot_Pop'].max()])
            # except ValueError:
            #     pass

            # Year vs POP_EMP_log_FAC_scaled --> Graph (1,0)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='PCT_HH_NO_VEH', label=str(mode), ax=ax[1][0], legend=True)
            ax[1][0].set(xlabel="Years", ylabel='PCT of HH with no veh.')
            ax[1][0].legend(loc='best')
            # ax[1][0].set_autoscaley_on(True)
            # ax[1][0].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # try:                                
            #     ax[1][0].grid(True)
            #     ax[1][0].margins(0.20)
            #     ax[1][0].set_ylim([0,df_fltr_mode['PCT_HH_NO_VEH'].max()])
            # except ValueError:
            #     pass

            # Year vs Income levels --> Graph (2,0)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='INC_U35', label=("INC_U35 - "+str(mode)), ax=ax[2][0], legend=True)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='INC_35_100', label=("INC_35_100 - "+str(mode)), ax=ax[2][0], legend=True)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='INC_100P', label=("INC_100P - "+str(mode)), ax=ax[2][0], legend=True)
            ax[2][0].set(xlabel="Years", ylabel='INC_Levels($)')
            ax[2][0].legend(loc='best')
            # ax[2][0].set_autoscaley_on(True)
            # ax[2][0].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[2][0].set_autoscaley_on(False)
            # try:                                
            #     ax[2][0].grid(True)
            #     ax[2][0].margins(0.20)
            #     ax[2][0].set_ylim([0,max(df_fltr_mode[['INC_U35', 'INC_35_100','INC_100P']].values.max(1))])
            # except ValueError:
            #     pass

            # Year vs Gas price (S) --> Graph (3,0)
            df_fltr_mode.groupby('Mode').plot(x='Year', y='GAS_PRICE_2018', label=str(mode), ax=ax[3][0], legend=True)
            ax[3][0].set(xlabel="Years", ylabel='Gas price ($)')
            ax[3][0].legend(loc='best')
            # ax[3][0].set_autoscaley_on(True)
            # ax[3][0].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[3][0].set_autoscaley_on(False)
            # try:                                
            #     ax[3][0].grid(True)
            #     ax[3][0].margins(0.20)
            #     ax[3][0].set_ylim([0,df_fltr_mode['GAS_PRICE_2018'].max()])
            # except ValueError:
            #     pass

            
            label_upt_adj = str(mode)+"_UPT_ADJ"
            label_avg_spd = str(mode)+"_AVG_SPEED"
            label_fare = str(mode)+"_FARE_per_UPT_2018"
            label_VRMs = str(mode)+"_VRM_ADJ"

            # Year vs UPT_ADJ --> Graph (0,1)
            df_fltr_mode.groupby('Mode').plot(x='Year', y=label_upt_adj, label=str(mode), ax=ax[0][1], legend=True)
            ax[0][1].set(xlabel="Years", ylabel='Ridership')
            ax[0][1].legend(loc='best')
            # ax[0][1].set_autoscaley_on(True)
            # ax[0][1].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[0][1].set_autoscaley_on(False)
            # try:                                
            #     ax[0][1].grid(True)
            #     ax[0][1].margins(0.20)
            #     ax[0][1].set_ylim([0,df_fltr_mode[label_upt_adj].max()])
            # except ValueError:
            #     pass

             # Year vs FARE_per_UPT --> Graph (1,1)
            df_fltr_mode.groupby('Mode').plot(x='Year', y=label_fare, label=str(mode), ax=ax[1][1], legend=True)
            ax[1][1].set(xlabel="Years", ylabel='Fare per UPT (S)')
            ax[1][1].legend(loc='best')
            # ax[1][1].set_autoscaley_on(True)
            # ax[1][1].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[1][1].set_autoscaley_on(False)
            # try:                                
            #     ax[1][1].grid(True)
            #     ax[1][1].margins(0.20)
            #     ax[1][1].set_ylim([0,df_fltr_mode[label_fare].max()])
            # except ValueError:
            #     pass

            # Year vs Avg speed --> Graph (2,1)
            df_fltr_mode.groupby('Mode').plot(x='Year', y=label_avg_spd, label=str(mode), ax=ax[2][1], legend=True)
            ax[2][1].set(xlabel="Years", ylabel='Avg speed')
            ax[2][1].legend(loc='best')
            # ax[2][1].set_autoscaley_on(True)
            # ax[2][1].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[2][1].set_autoscaley_on(False)
            # try:                                
            #     ax[2][1].grid(True)
            #     ax[2][1].margins(0.20)
            #     ax[2][1].set_ylim([0,df_fltr_mode[label_avg_spd].max()])
            # except ValueError:
            #     pass

            # Year vs VRM_ADJ_log_FAC_scaled --> Graph (1,3)
            df_fltr_mode.groupby('Mode').plot(x='Year', y=label_VRMs, label=str(mode), ax=ax[3][1], legend=True)
            ax[3][1].set(xlabel="Years", ylabel='VRMs')
            ax[3][1].legend(loc='best')
            # ax[3][1].set_autoscaley_on(True)
            # ax[3][1].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
            # ax[3][1].set_autoscaley_on(False)
            # try:                                
            #     ax[3][1].grid(True)
            #     ax[3][1].margins(0.20)
            #     ax[3][1].set_ylim([0,df_fltr_mode[label_VRMs].max()])
            # except ValueError:
            #     pass

        fig.suptitle(('Cluster Code:' + str(cluster_code)),fontsize=14)
        fig.tight_layout()
        _figno = x

        # code to let these file save in the specific folder
        os.chdir(sFoldername)
        # add folder name
    #         save_folder = output_folder +'\\' + folder_path
    #     os.path.join(sFoldername, folder_path)
    #     if not os.path.exists(sFolder):
    #         os.mkdir(folder_path)
    #         print (folder_path + ": sucessfully created")
    #     else:
    #         print (folder_path + ": already exists")
    # 
    #     mod = sFoldername + "\\" + str(folder_path)
    #     os.chdir(mod)
        fig.savefig((str(_figno) + "-" + cluster_code + ".png"))
        plt.suptitle(cluster_code,fontsize=14)
        plt.close(fig)
        x += 1

In [13]:
def main():
    create_clusterwise_files()
    clusters = ['CLUSTER_GT_NEW_11', 'CLUSTER_GT_8_GROUPS','CLUSTER_APTA']
    for cluster in clusters:
        folder_path = str(cluster)
        sFoldername = output_folder + "\Clusterwise_data_charts\Charts"
        os.chdir(sFoldername)
        if not os.path.exists(os.path.join(sFoldername,folder_path)):                        
            os.mkdir(cluster)
            print ("Folder for " + cluster+" : successfully created")            
        else:
            print ("Folder for " + cluster+" : already exists")
            
        os.chdir(os.path.join(output_folder, "Clusterwise_data_charts"))
        df = pd.read_csv(str(cluster)+".csv")
        sFoldername = output_folder + "\Clusterwise_data_charts\Charts" + "\\" + folder_path
        os.chdir(sFoldername)
        generate_charts(df, str(cluster), str(cluster), str(cluster))
        print("Loop for " + cluster + " completed")
    

if __name__ == "__main__":
    main()

Folder for CLUSTER_GT_NEW_11 : already exists
Cluster Code:1
Cluster Code:2
Cluster Code:3
Cluster Code:4
Cluster Code:5
Cluster Code:6
Cluster Code:7
Cluster Code:8
Cluster Code:9
Cluster Code:10
Cluster Code:11
Cluster Code:100
Loop for CLUSTER_GT_NEW_11 completed
Folder for CLUSTER_GT_8_GROUPS : already exists
Cluster Code:100
Cluster Code:A
Cluster Code:B
Cluster Code:C
Cluster Code:D
Cluster Code:E
Cluster Code:F
Cluster Code:G
Cluster Code:H
Loop for CLUSTER_GT_8_GROUPS completed
Folder for CLUSTER_APTA : already exists
Cluster Code:10
Cluster Code:21
Cluster Code:22
Cluster Code:23
Cluster Code:24
Cluster Code:31
Cluster Code:32
Cluster Code:33
Cluster Code:34
Cluster Code:100
Loop for CLUSTER_APTA completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
