# Create date object, convert to column, & export with date filename

### Description

- Use when the date is defined in a single cell in an excel .csv template
- Convert the date to a datetime datatype, make an object, then reference to assign to a column & filename
- Useful when converting excel templates to combine together- each field needs the date data was recorded
- Can incorporate into cleaning function when iterating through files in a directory

## Import libraries

In [1]:
import pandas as pd
import numpy as np
import os

## Import data

In [2]:
# Import the datafile from the date_templates_raw subfolder
dfa1 = pd.read_csv('date_templates_raw/date_template_raw_4Mar17.csv')

In [3]:
dfa1

Unnamed: 0,date,Unnamed: 1,Unnamed: 2
0,3/4/2017,,
1,number,letter,color
2,1,a,red
3,2,b,orange
4,3,c,yellow


## Define date object

In [4]:
# Define the date object as the field under the 'date' header
date = dfa1.loc[0, 'date']

# Convert to datetime value type
date = pd.to_datetime(date, format = '%m/%d/%Y')

print('The date this template was filled is', date)

The date this template was filled is 2017-03-04 00:00:00


## Define headers & delete rows not needed

In [5]:
# Define headers as index row 1
dfa1.columns = dfa1.iloc[1] 

In [6]:
dfa1

1,number,letter,color
0,3/4/2017,,
1,number,letter,color
2,1,a,red
3,2,b,orange
4,3,c,yellow


In [7]:
# Delete top rows- not needed now that date object is defined
dfa1 = dfa1[2:].reset_index(drop=True)

In [8]:
dfa1

1,number,letter,color
0,1,a,red
1,2,b,orange
2,3,c,yellow


## Add date column

In [9]:
dfa1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   number  3 non-null      object
 1   letter  3 non-null      object
 2   color   3 non-null      object
dtypes: object(3)
memory usage: 204.0+ bytes


In [10]:
# Create column for date, fill with date object
dfa1['date'] = date

# Move to position 0
col = dfa1.pop('date')
dfa1.insert(0, 'date', col)

In [11]:
dfa1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    3 non-null      datetime64[ns]
 1   number  3 non-null      object        
 2   letter  3 non-null      object        
 3   color   3 non-null      object        
dtypes: datetime64[ns](1), object(3)
memory usage: 228.0+ bytes


In [12]:
dfa1

1,date,number,letter,color
0,2017-03-04,1,a,red
1,2017-03-04,2,b,orange
2,2017-03-04,3,c,yellow


## Export data

In [13]:
# Format date as a string without time to use in filename
date_string = date.strftime("%Y-%m-%d")

# Create the new file name with updated formatting for consistency in cleaned files
file = f'cleaned_date_template_{date_string}.csv'

# Save the file in the date_templates_clean subfolder
dfa1.to_csv(os.path.join('date_templates_clean', file), encoding = 'utf-8', index = False, header = True)