# From one dataset to many

**Load the data**

In [59]:
import pandas as pd
data = pd.read_csv('../data/fake_beiersdorf_data.csv')
data.shape

(5000, 9)

In [60]:
data.head()

Unnamed: 0,Date,Var1,Var2,Var3,Name,Address,Zip-code,City,Country
0,2011-11-20,0.764554,completed,True,Beiersdorf Customer Supply GmbH,Unnastrasse 48,20253,Hamburg,Germany
1,2006-11-27,0.380247,in progress,False,Beiersdorf Shared Services GmbH,Quickbornstrasse 24,20253,Hamburg,Germany
2,2007-09-29,0.807108,completed,False,Beiersdorf Customer Supply GmbH,Unnastrasse 48,20253,Hamburg,Germany
3,2006-09-05,0.649908,in progress,True,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
4,2014-12-12,0.951206,in progress,True,Beiersdorf Shared Services GmbH,Quickbornstrasse 24,20253,Hamburg,Germany


## Split large dataset into many small ones based on the variable `Name`

In [61]:
data.Name

0              Beiersdorf Customer Supply GmbH
1              Beiersdorf Shared Services GmbH
2              Beiersdorf Customer Supply GmbH
3        Beiersdorf Manufacturing Hamburg GmbH
4              Beiersdorf Shared Services GmbH
5              Beiersdorf Customer Supply GmbH
6              Beiersdorf Customer Supply GmbH
7              Beiersdorf Shared Services GmbH
8            La Prairie Group Deutschland GmbH
9              Beiersdorf Shared Services GmbH
10                               Beiersdorf AG
11      Beiersdorf Manufacturing Waldheim GmbH
12                               Beiersdorf AG
13      Beiersdorf Manufacturing Waldheim GmbH
14           La Prairie Group Deutschland GmbH
15       Beiersdorf Manufacturing Hamburg GmbH
16             Beiersdorf Customer Supply GmbH
17      Beiersdorf Manufacturing Waldheim GmbH
18        Beiersdorf Manufacturing Berlin GmbH
19        Beiersdorf Manufacturing Berlin GmbH
20             Beiersdorf Customer Supply GmbH
21           

In [62]:
data.Name.unique()

array(['Beiersdorf Customer Supply GmbH',
       'Beiersdorf Shared Services GmbH',
       'Beiersdorf Manufacturing Hamburg GmbH',
       'La Prairie Group Deutschland GmbH', 'Beiersdorf AG',
       'Beiersdorf Manufacturing Waldheim GmbH',
       'Beiersdorf Manufacturing Berlin GmbH'], dtype=object)

In [63]:
data.Name.nunique()

7

In [64]:
for name in data.Name.unique():
    print(f'Computing {name} ...')
    subset = data.loc[data.Name == name]

Computing Beiersdorf Customer Supply GmbH ...
Computing Beiersdorf Shared Services GmbH ...
Computing Beiersdorf Manufacturing Hamburg GmbH ...
Computing La Prairie Group Deutschland GmbH ...
Computing Beiersdorf AG ...
Computing Beiersdorf Manufacturing Waldheim GmbH ...
Computing Beiersdorf Manufacturing Berlin GmbH ...


In [65]:
subset.sample(10)

Unnamed: 0,Date,Var1,Var2,Var3,Name,Address,Zip-code,City,Country
392,2017-02-02,0.900758,completed,True,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
2370,2016-04-25,0.777162,completed,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
1344,2006-07-03,0.969716,completed,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
4519,2014-08-14,0.529614,completed,True,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
3932,2015-04-25,0.65668,completed,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
3298,2015-10-01,0.749431,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
1290,2018-03-31,0.564161,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
4967,2009-02-18,0.095276,completed,True,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
4131,2006-06-14,0.590672,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
3843,2015-11-03,0.121099,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany


In [66]:
import os
for name in data.Name.unique():
    print(f'Computing {name} ...')
    subset = data.loc[data.Name == name]
    fname = f'subset_{name.replace(" ", "_")}.csv'
    filepath = os.path.join('..', 'data', 'interim', fname)
    subset.to_csv(filepath)

Computing Beiersdorf Customer Supply GmbH ...
Computing Beiersdorf Shared Services GmbH ...
Computing Beiersdorf Manufacturing Hamburg GmbH ...
Computing La Prairie Group Deutschland GmbH ...
Computing Beiersdorf AG ...
Computing Beiersdorf Manufacturing Waldheim GmbH ...
Computing Beiersdorf Manufacturing Berlin GmbH ...


***

# From many to one

In [67]:
!ls ../data/interim/

subset_Beiersdorf_AG.csv
subset_Beiersdorf_Customer_Supply_GmbH.csv
subset_Beiersdorf_Manufacturing_Berlin_GmbH.csv
subset_Beiersdorf_Manufacturing_Hamburg_GmbH.csv
subset_Beiersdorf_Manufacturing_Waldheim_GmbH.csv
subset_Beiersdorf_Shared_Services_GmbH.csv
subset_La_Prairie_Group_Deutschland_GmbH.csv


In [68]:
import glob

In [69]:
filelist = glob.glob(os.path.join('..', 'data', 'interim', '*.csv'))
filelist

['../data/interim/subset_La_Prairie_Group_Deutschland_GmbH.csv',
 '../data/interim/subset_Beiersdorf_Manufacturing_Berlin_GmbH.csv',
 '../data/interim/subset_Beiersdorf_Customer_Supply_GmbH.csv',
 '../data/interim/subset_Beiersdorf_AG.csv',
 '../data/interim/subset_Beiersdorf_Manufacturing_Waldheim_GmbH.csv',
 '../data/interim/subset_Beiersdorf_Shared_Services_GmbH.csv',
 '../data/interim/subset_Beiersdorf_Manufacturing_Hamburg_GmbH.csv']

In [70]:
dfs = []
for f in filelist:
    data = pd.read_csv(f, index_col=0)
    dfs.append(data)
fulldata = pd.concat(dfs)
fulldata.shape

(5000, 9)

In [71]:
fulldata.sample(10)

Unnamed: 0,Date,Var1,Var2,Var3,Name,Address,Zip-code,City,Country
2674,2012-03-17,0.263135,completed,False,La Prairie Group Deutschland GmbH,Lange Straße 65,76530,Baden-BadenHamburg,Germany
1628,2005-12-21,0.867838,completed,False,La Prairie Group Deutschland GmbH,Lange Straße 65,76530,Baden-BadenHamburg,Germany
4072,2011-06-25,0.52032,in progress,False,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
973,2017-02-09,0.508332,completed,False,La Prairie Group Deutschland GmbH,Lange Straße 65,76530,Baden-BadenHamburg,Germany
4292,2007-12-03,0.968425,completed,True,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
1136,2012-03-21,0.757894,completed,True,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
3154,2009-01-12,0.505514,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
4685,2005-02-22,0.453154,completed,False,Beiersdorf Customer Supply GmbH,Unnastrasse 48,20253,Hamburg,Germany
3519,2012-08-31,0.831553,completed,True,Beiersdorf AG,Unnastrasse 48,20253,Hamburg,Germany
229,2014-08-06,0.911588,in progress,True,Beiersdorf Manufacturing Waldheim GmbH,Am Eichberg,4736,Waldheim,Germany


***