# Pandas version of the code in "R for Marketing Research and Analytics"

## Ch 3 - Simulating data and plotting

In [28]:
from IPython.core.interactiveshell import InteractiveShell

# pretty print only the last output of the cell
#InteractiveShell.ast_node_interactivity = "last_expr"
# pretty print all cell's output and not just the last one
InteractiveShell.ast_node_interactivity = "all"

In [21]:
import pandas as pd
import numpy as np

In [22]:
#define constants
k_stores = 20  # 20 stores, using "k_" for "constant"
k_weeks = 104   # 2 years of data each

In [32]:
# create data frame of initially missing values to hold the data
df_store = pd.DataFrame(data=np.empty(shape=(k_stores*k_weeks, 10)),
                        columns=['storNum', 'Year', 'Week', 'p1sales', 'p2sales', 'p1price','p2price','p1prom','p2prom','country'])
df_store

Unnamed: 0,storNum,Year,Week,p1sales,p2sales,p1price,p2price,p1prom,p2prom,country
0,0.0,6.614996e-319,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
df_store.shape

(2080, 10)

Create 2 series that will represent the store number and country for each observation

In [29]:
store_num = pd.Series(range(101,100+k_stores+1))
store_num
store_cty = ["US"]*3 + ["DE"]*5 + ["GB"]*3 + ["BR"]*2 + ["JP"]*4 + ["AU"]*1 + ["CN"]*2
store_cty
len(store_cty)  # make sure this is the right length

0     101
1     102
2     103
3     104
4     105
5     106
6     107
7     108
8     109
9     110
10    111
11    112
12    113
13    114
14    115
15    116
16    117
17    118
18    119
19    120
dtype: int64

['US',
 'US',
 'US',
 'DE',
 'DE',
 'DE',
 'DE',
 'DE',
 'GB',
 'GB',
 'GB',
 'BR',
 'BR',
 'JP',
 'JP',
 'JP',
 'JP',
 'AU',
 'CN',
 'CN']

20

In [40]:
np.repeat(store_num, 3)

0     101
0     101
0     101
1     102
1     102
1     102
2     103
2     103
2     103
3     104
3     104
3     104
4     105
4     105
4     105
5     106
5     106
5     106
6     107
6     107
6     107
7     108
7     108
7     108
8     109
8     109
8     109
9     110
9     110
9     110
10    111
10    111
10    111
11    112
11    112
11    112
12    113
12    113
12    113
13    114
13    114
13    114
14    115
14    115
14    115
15    116
15    116
15    116
16    117
16    117
16    117
17    118
17    118
17    118
18    119
18    119
18    119
19    120
19    120
19    120
dtype: int64

In [41]:
# now replace the appropriate column in the dataframe with those values

# DAY TODO
#   These need to fill in to the right number of rows by repeating the 20 values x times
#df_store.storNum = np.repeat(store_num, k_weeks)
df_store.storNum = store_num * k_weeks
#df_store.country = store_cty
df_store

Unnamed: 0,storNum,Year,Week,p1sales,p2sales,p1price,p2price,p1prom,p2prom,country
0,10504.0,6.614996e-319,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10608.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10712.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10816.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10920.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,11024.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,11128.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,11232.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,11336.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,11440.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
'A B C'.split()
'A B C'.split() * 6
'1 2 3'.split() * 6

['A', 'B', 'C']

['A',
 'B',
 'C',
 'A',
 'B',
 'C',
 'A',
 'B',
 'C',
 'A',
 'B',
 'C',
 'A',
 'B',
 'C',
 'A',
 'B',
 'C']

['1',
 '2',
 '3',
 '1',
 '2',
 '3',
 '1',
 '2',
 '3',
 '1',
 '2',
 '3',
 '1',
 '2',
 '3',
 '1',
 '2',
 '3']