# Import, combine, & export datasets from certain file path

### Description

- Use to import *multiple* files from a certain file path (use file iteration to loop through folder)
- Import, combine all files in folder into single dataframe, export
- Useful if all templates are identical- loop through folder for efficiency
- Useful for file in subfolder in current working directory (cwd), file in directory one above cwd, or file in subdirectory one above cwd

## Import libraries

In [1]:
import os
import pandas as pd

## Import & combine data: subfolder

In [2]:
# Define the file path for the subfolder to pull the raw data files from
sub_folder_raw = 'sub_folder_raw'

# Create an empty list to store dataframes
dfa1 = []

# Loop through the file in the folder
for file in os.listdir(sub_folder_raw):
    if file.endswith('.csv'):
        file_path = os.path.join(sub_folder_raw, file)
        df = pd.read_csv(file_path)
        dfa1.append(df)

# Combine all dataframes
dfa2 = pd.concat(dfa1, ignore_index = True)

In [3]:
dfa2

Unnamed: 0,number,letter
0,1,a
1,2,b
2,3,c
3,4,d
4,5,e
5,6,f
6,7,g
7,8,h
8,9,i
9,1,a


## Import & combine data: parent folder

In [4]:
# Define the file path for the parent folder to pull the raw data files from
parent_folder_raw = '..'

# Create an empty list to store dataframes
dfb1 = []

# Loop through the file in the folder
for file in os.listdir(parent_folder_raw):
    if file.endswith('.csv'):
        file_path = os.path.join(parent_folder_raw, file)
        df = pd.read_csv(file_path)
        dfb1.append(df)

# Combine all dataframes
dfb2 = pd.concat(dfb1, ignore_index = True)

In [5]:
dfb2

Unnamed: 0,number,letter
0,10,j
1,11,k
2,12,l
3,13,m
4,14,n
5,15,o
6,16,p
7,17,q
8,18,r
9,10,j


## Import & combine files: sister folder

In [6]:
# Define the file path for the sister folder to pull the raw data files from
sister_folder_raw = os.path.join('..', 'sister_folder_raw')

# Create an empty list to store dataframes
dfc1 = []

# Loop through the file in the folder
for file in os.listdir(sister_folder_raw):
    if file.endswith('.csv'):
        file_path = os.path.join(sister_folder_raw, file)
        df = pd.read_csv(file_path)
        dfc1.append(df)

# Combine all dataframes
dfc2 = pd.concat(dfc1, ignore_index = True)

In [7]:
dfc2

Unnamed: 0,number,letter
0,19,s
1,20,t
2,21,u
3,22,v
4,23,w
5,24,x
6,25,y
7,26,z
8,27,aa
9,19,s


## Export combined dataframes as .csv to specific directories

In [8]:
# Export dfa2 to new subfolder
dfa2.to_csv(os.path.join('sub_folder_raw', 'sub_files_raw_combined.csv'), encoding = 'utf-8', index = False, header = True)

# Export dfb2 to parent folder
dfb2.to_csv(os.path.join('..', 'parent_files_raw_combined.csv'), encoding = 'utf-8', index = False, header = True)

# Export dfc2 to sister folder
dfc2.to_csv(os.path.join('..', 'sister_folder_raw', 'sister_files_raw_combined.csv'), encoding = 'utf-8', index = False, header = True)