In [1]:
import os
import pandas as pd
import pickle

In [2]:
def read_and_concatenate_csvs(file_pattern):
    files = [f for f in os.listdir() if f.startswith(file_pattern)]
    dataframes = [pd.read_csv(file) for file in files]
    concatenated_df = pd.concat(dataframes, ignore_index=True)
    return concatenated_df

In [3]:
def main():
    file_pattern = 'legislation_data_batch_'
    df = read_and_concatenate_csvs(file_pattern)
    
    # Convert 'introducedDate' to datetime
    df['introduced_date'] = pd.to_datetime(df['introduced_date'])
    
    # Save full DataFrame to pickle and CSV
    df.to_pickle('full_data.pickle')
    df.to_csv('full_data.csv', index=False)
    
    # Filter rows with introduced date later than 2000
    df = df[df['introduced_date'].dt.year >= 2013]
    
    # Select only the required columns
    df_subset = df[['congress', 'number', 'introduced_date','type']]
    
    # Remove rows with null values in the 'number' column
    df_subset = df_subset.dropna(subset=['number'])
    
    # Save subset DataFrame to pickle and CSV
    df_subset.to_pickle('subset_data.pickle')
    df_subset.to_csv('subset_data.csv', index=False)

In [4]:
if __name__ == '__main__':
    main()