In [20]:
import os
import pandas as pd

# Function to read data from catalog_id file
def read_catalog_file(catalog_file):
    df = pd.read_csv(catalog_file, header=None)
    # Extract values from 2nd to 5th row
    values = df.iloc[1:5, 0].tolist()
    return values

# Function to update corresponding table_id file
def update_table_file(table_file, values):
    df = pd.read_csv(table_file)
    # Delete the last row
    df.drop(df.tail(1).index, inplace=True)
    # Add values as new columns in the corresponding table_id file
    df['Seasonally Adjusted'] = values[0]
    df['Area'] = values[1]
    df['State_or_MSA'] = values[2]
    df['State'] = values[3]
    df.to_csv(table_file, index=False)
    
# Path to the folder containing files
folder_path = './'

# Iterate through each folder in the directory
for foldername in os.listdir(folder_path):
    folder_fullpath = os.path.join(folder_path, foldername)
    if os.path.isdir(folder_fullpath):
        # Iterate through each file in the folder
        for filename in os.listdir(folder_fullpath):
            if filename.startswith('catalog'):
                print(filename)
                # Extract id from filename
                catalog_id = filename[7:-4]
                print(catalog_id)
                # Read values from catalog_id file
                values = read_catalog_file(os.path.join(folder_fullpath, filename))
                print(values)
                # Find corresponding table_id file
                table_filename = f'table{catalog_id}.csv'
                print(table_filename)
                table_file_path = os.path.join(folder_fullpath, table_filename)
                if os.path.exists(table_file_path):
                    # Update corresponding table_id file
                    update_table_file(table_file_path, values)
                    print('yes')
                    # Delete the catalog file
                    os.remove(os.path.join(folder_fullpath, filename))
                    print('Catalog file deleted.')


catalog5.csv
5
['Not Seasonally Adjusted', 'West Virginia', 'Statewide', 'West Virginia']
table5.csv
yes
Catalog file deleted.
catalog6.csv
6
['Seasonally Adjusted', 'West Virginia', 'Statewide', 'West Virginia']
table6.csv
yes
Catalog file deleted.
catalog7.csv
7
['Not Seasonally Adjusted', 'Wheeling, WV-OH Metropolitan Statistical Area', 'Metropolitan areas', 'West Virginia']
table7.csv
yes
Catalog file deleted.
catalog0.csv
0
['Not Seasonally Adjusted', 'Appleton, WI Metropolitan Statistical Area', 'Metropolitan areas', 'Wisconsin']
table0.csv
yes
Catalog file deleted.
catalog1.csv
1
['Not Seasonally Adjusted', 'Eau Claire, WI Metropolitan Statistical Area', 'Metropolitan areas', 'Wisconsin']
table1.csv
yes
Catalog file deleted.
catalog10.csv
10
['Not Seasonally Adjusted', 'Sheboygan, WI Metropolitan Statistical Area', 'Metropolitan areas', 'Wisconsin']
table10.csv
yes
Catalog file deleted.
catalog11.csv
11
['Not Seasonally Adjusted', 'Wausau, WI Metropolitan Statistical Area', 'Met

In [34]:
# Add civilian non instituional column to statewide seasonally adjusted files

# Function to find the CSV files with the specified conditions

def find_matching_csv(folder_path):
    matching_files = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            print(file_path)
            df = pd.read_csv(file_path)
            if 'Seasonally Adjusted' in df.columns and 'State_or_MSA' in df.columns:
                if (df['Seasonally Adjusted'] == 'Not Seasonally Adjusted').all() and (df['State_or_MSA'] == 'Statewide').all():
                    matching_files.append(file_path)
                    print(matching_files)
    return matching_files

# Function to add the missing column in the second file
def add_missing_column(file1_path, file2_path):
    df1 = pd.read_csv(file1_path)
    df2 = pd.read_csv(file2_path)

    missing_column = set(df1.columns) - set(df2.columns)
    if missing_column:
        missing_column = missing_column.pop()
        df2.insert(df1.columns.get_loc(missing_column), missing_column, '')
        df2.to_csv(file2_path, index=False)

# Specify your folder path containing the CSV files
folder_path = './+54'

# Find matching CSV files
matching_csv_files = find_matching_csv(folder_path)

if len(matching_csv_files) >= 2:
    # Add missing column in the second file
    add_missing_column(matching_csv_files[0], matching_csv_files[1])
    print("Extra column added successfully in the second file.")
else:
    print("Not enough matching CSV files found.")


./+54\concatenated_('Year', 'Period', 'labor force', 'employment', 'unemployment', 'unemployment rate').csv
./+54\table0.csv
./+54\table1.csv
./+54\table2.csv
./+54\table3.csv
./+54\table4.csv
./+54\table5.csv
['./+54\\table5.csv']
./+54\table6.csv
./+54\table7.csv
Not enough matching CSV files found.


In [21]:
df1=pd.read_csv('./+54/table5.csv')
df2=pd.read_csv('./+54/table6.csv')

In [22]:
df1

Unnamed: 0,Year,Period,civilian noninstitutional population,labor force participation rate,employment-population ratio,labor force,employment,unemployment,unemployment rate,Seasonally Adjusted,Area,State_or_MSA,State
0,2014,Jan,1486189,53.1,49.2,789413,730750,58663,7.4,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
1,2014,Feb,1485849,53.6,49.3,795736,732656,63080,7.9,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
2,2014,Mar,1485541,53.3,49.4,791982,733327,58655,7.4,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
3,2014,Apr,1485268,53.5,50.2,795306,745440,49866,6.3,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
4,2014,May,1485131,53.9,50.5,800834,750399,50435,6.3,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,2023,Sep,1431175(R),55.4(R),53.4(R),793407(R),763895(R),29512(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
117,2023,Oct,1431399(R),55.4(R),53.4(R),793052(R),763662(R),29390(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
118,2023,Nov,1431405(R),55.0(R),53.0(R),787082(R),758291(R),28791(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
119,2023,Dec,1431411(R),54.7(R),52.5(R),782306(R),751507(R),30799(R),3.9(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia


In [23]:
df2

Unnamed: 0,Year,Period,labor force participation rate,employment-population ratio,labor force,employment,unemployment,unemployment rate,Seasonally Adjusted,Area,State_or_MSA,State
0,2014,Jan,53.7,50.0,797557,743740,53817,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
1,2014,Feb,53.7,50.1,797946,744294,53652,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
2,2014,Mar,53.7,50.1,797830,744590,53240,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
3,2014,Apr,53.7,50.1,797081,744377,52704,6.6,Seasonally Adjusted,West Virginia,Statewide,West Virginia
4,2014,May,53.6,50.1,795967,743871,52096,6.5,Seasonally Adjusted,West Virginia,Statewide,West Virginia
...,...,...,...,...,...,...,...,...,...,...,...,...
116,2023,Sep,55.2(R),52.8(R),789914(R),756235(R),33679(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
117,2023,Oct,55.2(R),52.8(R),790637(R),756448(R),34189(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
118,2023,Nov,55.2(R),52.8(R),790471(R),756392(R),34079(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
119,2023,Dec,55.2(R),52.8(R),790224(R),756321(R),33903(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia


In [24]:
# Identify missing columns in df2
missing_columns = [col for col in df1.columns if col not in df2.columns]
# Add missing columns to df2 at the same location
for col in missing_columns:
    index = df1.columns.get_loc(col)
    df2.insert(index, col, df1[col])

In [25]:
df1

Unnamed: 0,Year,Period,civilian noninstitutional population,labor force participation rate,employment-population ratio,labor force,employment,unemployment,unemployment rate,Seasonally Adjusted,Area,State_or_MSA,State
0,2014,Jan,1486189,53.1,49.2,789413,730750,58663,7.4,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
1,2014,Feb,1485849,53.6,49.3,795736,732656,63080,7.9,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
2,2014,Mar,1485541,53.3,49.4,791982,733327,58655,7.4,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
3,2014,Apr,1485268,53.5,50.2,795306,745440,49866,6.3,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
4,2014,May,1485131,53.9,50.5,800834,750399,50435,6.3,Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,2023,Sep,1431175(R),55.4(R),53.4(R),793407(R),763895(R),29512(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
117,2023,Oct,1431399(R),55.4(R),53.4(R),793052(R),763662(R),29390(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
118,2023,Nov,1431405(R),55.0(R),53.0(R),787082(R),758291(R),28791(R),3.7(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia
119,2023,Dec,1431411(R),54.7(R),52.5(R),782306(R),751507(R),30799(R),3.9(R),Not Seasonally Adjusted,West Virginia,Statewide,West Virginia


In [26]:
df2

Unnamed: 0,Year,Period,civilian noninstitutional population,labor force participation rate,employment-population ratio,labor force,employment,unemployment,unemployment rate,Seasonally Adjusted,Area,State_or_MSA,State
0,2014,Jan,1486189,53.7,50.0,797557,743740,53817,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
1,2014,Feb,1485849,53.7,50.1,797946,744294,53652,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
2,2014,Mar,1485541,53.7,50.1,797830,744590,53240,6.7,Seasonally Adjusted,West Virginia,Statewide,West Virginia
3,2014,Apr,1485268,53.7,50.1,797081,744377,52704,6.6,Seasonally Adjusted,West Virginia,Statewide,West Virginia
4,2014,May,1485131,53.6,50.1,795967,743871,52096,6.5,Seasonally Adjusted,West Virginia,Statewide,West Virginia
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,2023,Sep,1431175(R),55.2(R),52.8(R),789914(R),756235(R),33679(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
117,2023,Oct,1431399(R),55.2(R),52.8(R),790637(R),756448(R),34189(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
118,2023,Nov,1431405(R),55.2(R),52.8(R),790471(R),756392(R),34079(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia
119,2023,Dec,1431411(R),55.2(R),52.8(R),790224(R),756321(R),33903(R),4.3(R),Seasonally Adjusted,West Virginia,Statewide,West Virginia


In [18]:
import pandas as pd
import os

# Define the directory containing CSV files
directory = './+54'

# Initialize a dictionary to hold DataFrame groups based on column structure
dataframes_by_columns = {}

# Loop through all CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        # Read the CSV file
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        
        # Get the columns of the DataFrame as a tuple (to make it hashable)
        columns = tuple(df.columns)
        
        # Add the DataFrame to the corresponding group based on its column structure
        if columns not in dataframes_by_columns:
            dataframes_by_columns[columns] = [(filename, df)]  # Include filename
        else:
            dataframes_by_columns[columns].append((filename, df))  # Include filename

# Concatenate DataFrames with the same column structure
for columns, file_dfs in dataframes_by_columns.items():
    if len(file_dfs) > 1:
        print(f"Concatenating files: {[file_df[0] for file_df in file_dfs]}")  # Print filenames
        dfs = [file_df[1] for file_df in file_dfs]
        concatenated_df = pd.concat(dfs, ignore_index=True)
        # Define the output file path in the same directory
        output_filepath = os.path.join(directory, f'concatenated_{columns}.csv')
        # Output concatenated DataFrame to the same directory
        concatenated_df.to_csv(output_filepath, index=False)


Concatenating files: ['table0.csv', 'table1.csv', 'table2.csv', 'table3.csv', 'table4.csv', 'table7.csv']


Unnamed: 0,Year,Period,labor force,employment,unemployment,unemployment rate,Seasonally Adjusted,Area,State_or_MSA,State
0,2014,Jan,47047,42853,4194,8.9,Not Seasonally Adjusted,"Anniston-Oxford-Jacksonville, AL Metropolitan ...",Metropolitan areas,Alabama
1,2014,Feb,47725,43484,4241,8.9,Not Seasonally Adjusted,"Anniston-Oxford-Jacksonville, AL Metropolitan ...",Metropolitan areas,Alabama
2,2014,Mar,47793,43625,4168,8.7,Not Seasonally Adjusted,"Anniston-Oxford-Jacksonville, AL Metropolitan ...",Metropolitan areas,Alabama
3,2014,Apr,47424,43872,3552,7.5,Not Seasonally Adjusted,"Anniston-Oxford-Jacksonville, AL Metropolitan ...",Metropolitan areas,Alabama
4,2014,May,47496,43747,3749,7.9,Not Seasonally Adjusted,"Anniston-Oxford-Jacksonville, AL Metropolitan ...",Metropolitan areas,Alabama
...,...,...,...,...,...,...,...,...,...,...
941,2023,Jul,39193,38165,1028,2.6,Not Seasonally Adjusted,"Gadsden, AL Metropolitan Statistical Area",Metropolitan areas,Alabama
942,2023,Aug,39183,38114,1069,2.7,Not Seasonally Adjusted,"Gadsden, AL Metropolitan Statistical Area",Metropolitan areas,Alabama
943,2023,Sep,39303,38215,1088,2.8,Not Seasonally Adjusted,"Gadsden, AL Metropolitan Statistical Area",Metropolitan areas,Alabama
944,2023,Oct,39574,38508,1066,2.7,Not Seasonally Adjusted,"Gadsden, AL Metropolitan Statistical Area",Metropolitan areas,Alabama
