In [13]:
import csv
import json
from datetime import date, datetime

CUSTOMERS_CSV = 'acw_user_data (1).csv'

In [5]:
with open(CUSTOMERS_CSV) as f:
    reader = csv.DictReader(f)
    for i in reader.fieldnames:
        print(i)

Address Street
Address City
Address Postcode
Age (Years)
Distance Commuted to Work (miles)
Employer Company
Credit Card Start Date
Credit Card Expiry Date
Credit Card Number
Credit Card CVV
Dependants
First Name
Bank IBAN
Last Name
Marital Status
Yearly Pension (Â£)
Retired
Yearly Salary (Â£)
Sex
Vehicle Make
Vehicle Model
Vehicle Year
Vehicle Type


- Name:
  - First Name: `str`
  - Last Name: `str`
- Age (Years): `int`
- Sex: `str` (Male/Female)
- Marital Status: ``str`` (single/married or civil partner/divorced/widowed)
- Dependants: `int` & '' -> 0
- Address:
  - Address Street: `str`
  - Address City: `str`
  - Address Postcode: `str`
- Credit Card:
  - Credit Card Start Date `date` %m/%y
  - Credit Card Expiry Date `date` %m/%y
  - Credit Card Number `int`
  - Credit Card CVV `int`
  - Bank IBAN `str`
- Employment: 
  - Retired: `bool` (true/false)
    - Yearly Pension (Â£) `int` pounds 
  - Employer Company `str` & 'N/A' -> `None`(null)
  - Yearly Salary (Â£) `int` pounds
  - Distance Commuted to Work (miles) `float`
- Vehicle
  - Vehicle Make `str`
  - Vehicle Model `str`
  - Vehicle Year `int`
  - Vehicle Type `list` (array - using split)

In [27]:
def from_csv(csvfile):
    customer_data = []

    with open(csvfile, encoding='utf-8') as f:
        csv_reader = csv.DictReader(f)
        make_int = lambda x: 0 if x == 'N/A' or x == '' else int(x)
        for row in csv_reader:
            get_month = lambda x: int(row[x].split('/')[0])
            get_year = lambda x: int(row[x].split('/')[1])
            customer_details = {
                'name': {
                    'first': row['First Name'],
                    'last': row['Last Name']
                },
                'age': make_int(row['Age (Years)']),
                'sex': row['Sex'],
                'marital_status': row['Marital Status'],
                'dependants': make_int(row['Dependants']),
                'address': {
                    'street': row['Address Street'],
                    'city': row['Address City'],
                    'postcode': row['Address Postcode']
                },
                'credit_card': {
                    'start_date': {
                        'month': get_month('Credit Card Start Date'),
                        'year': get_year('Credit Card Start Date')
                        },
                    'end_date': {
                        'month': get_month('Credit Card Expiry Date'),
                        'year': get_year('Credit Card Expiry Date')
                        },
                    'number': int(row['Credit Card Number']),
                    'cvv': int(row['Credit Card CVV']),
                    'iban': row['Bank IBAN']
                },
                'employment': {
                    'retired': bool(row['Retired']),
                    'yearly_pension': make_int(row['Yearly Pension (£)']),
                    'employer': row['Employer Company'],
                    'yearly_salary': make_int(row['Yearly Salary (£)']),
                    'commute_distance': float(row[
                        'Distance Commuted to Work (miles)'])
                },
                'vehicle':{
                    'type': [item.strip() for item in row['Vehicle Type'].split(',')],
                    'year': make_int(row['Vehicle Year']),
                    'make': row['Vehicle Make'],
                    'model': row['Vehicle Model']
                }
            }
            customer_data.append(customer_details)
    return customer_data

In [28]:
with open('output/processed.json', mode='w', encoding='utf-8') as f:
    f.write(json.dumps(from_csv(CUSTOMERS_CSV), indent=2))