# Convert provided CSV format to a better CSV format

## Importing the needed libraries

In [1]:
import numpy as np
import pandas as pd

## Importing the *orders* data set

In [2]:
orders_path = 'orders.csv'

In [4]:
df_orders = pd.read_csv(
    orders_path,
    header=None,
    sep='|',
    index_col=None,
    parse_dates=[1, 5],
    infer_datetime_format=True,
    encoding='latin1',
    dtype={0: np.int64,
           2: object,
           3: object,
           4: np.int64,
           6: object,
           7: object,
           8: object,
           9: np.int64,
           10: np.float64,
           11: np.int64,
           12: np.int64,
           13: object,
           14: np.int64,
           15: np.float64}
)

df_orders.drop(df_orders.columns[-1], axis=1, inplace=True)
df_orders.reset_index()

df_orders.columns = ['Customer number', 'DoB', 'Gender', 'PoR', 'Order number',
           'Order date', 'Product number', 'Sub category', 'Category',
           'Count', 'Price', 'EDT', 'ADT', 'RoR', 'Rating']

   Customer number        DoB Gender            PoR  Order number Order date  \
0          1063418 1944-01-15  Woman    Bloemendaal       1251137 2014-12-20   
1          1062078 1934-01-17    Man        De Bilt       1658762 2017-11-03   
2          1007060 1947-01-14    Man     Doetinchem        106526 2013-06-08   
3          1063418 1944-01-15  Woman    Bloemendaal       1251137 2014-12-20   
4          1063290 1971-01-08  Woman  Franekeradeel        439149 2015-01-06   

  Product number        Sub category           Category  Count    Price  EDT  \
0  5146308036084       Garden chairs   Garden furniture      1  1090.82    2   
1  8119187109467             Torches     Garden heating      6   385.92    4   
2  6110364066490        Leaf blowers     Garden reamers      2   173.78    3   
3  8172375031575                Hoes  Garden hand tools      4    45.64    5   
4  8111132296154  Insects and vermin            Control      1   114.24    1   

   ADT                RoR  Rating  
0 

In [5]:
df_orders.dtypes

Customer number             int64
DoB                datetime64[ns]
Gender                     object
PoR                        object
Order number                int64
Order date         datetime64[ns]
Product number             object
Sub category               object
Category                   object
Count                       int64
Price                     float64
EDT                         int64
ADT                         int64
RoR                        object
Rating                      int64
dtype: object

In [6]:
df_orders["DoB"] = df_orders["DoB"].dt.strftime("%d-%m-%Y")
df_orders["Order date"] = df_orders["Order date"].dt.strftime("%d-%m-%Y")

In [7]:
print(df_orders.head(5))

   Customer number         DoB Gender            PoR  Order number  \
0          1063418  15-01-1944  Woman    Bloemendaal       1251137   
1          1062078  17-01-1934    Man        De Bilt       1658762   
2          1007060  14-01-1947    Man     Doetinchem        106526   
3          1063418  15-01-1944  Woman    Bloemendaal       1251137   
4          1063290  08-01-1971  Woman  Franekeradeel        439149   

   Order date Product number        Sub category           Category  Count  \
0  20-12-2014  5146308036084       Garden chairs   Garden furniture      1   
1  03-11-2017  8119187109467             Torches     Garden heating      6   
2  08-06-2013  6110364066490        Leaf blowers     Garden reamers      2   
3  20-12-2014  8172375031575                Hoes  Garden hand tools      4   
4  06-01-2015  8111132296154  Insects and vermin            Control      1   

     Price  EDT  ADT                RoR  Rating  
0  1090.82    2    2                NaN      -1  
1   385.92

In [9]:
df_orders.to_csv('orders_edited.csv', columns=['Customer number', 'DoB', 'Gender', 'PoR', 'Order number',
           'Order date', 'Product number', 'Sub category', 'Category',
           'Count', 'Price', 'EDT', 'ADT', 'RoR', 'Rating'], index=False)