### Generate Random Users

In [1]:
import numpy as np
import pandas as pd

raw_names = pd.read_table('data/names.txt', header=None, names=['name'])
# users = pd.read_csv('data/names.txt', header=None, names=['name'])

# users['name'].str.strip()
users_count = 10
activities = "Placed Order, Brought, Dropped, Commented, Canceled".split(',')
raw_names = np.random.choice(raw_names['name'].str.strip(), users_count)

users_dict = {
    'id': range(1,users_count+1),
    'name': raw_names
}

users = pd.DataFrame(users_dict)
users.head()

Unnamed: 0,id,name
0,1,Roselee Marland
1,2,Celinda Grindstaff
2,3,Jim Blasi
3,4,Brian Mazzone
4,5,Evelin Tafoya


### Generate Activites

In [2]:
def rand_activity(i):
    return np.random.choice(activities)

vect = np.vectorize(rand_activity)
rand_activities = vect(range(15))
rand_ids = np.random.choice(range(1,11), 15)

user_activities = pd.DataFrame({
    'id': rand_ids,
    'activity': rand_activities
}
)

user_activities.head()

Unnamed: 0,id,activity
0,10,Dropped
1,4,Commented
2,9,Canceled
3,4,Dropped
4,3,Dropped


### Join Users and Activity

In [3]:
pd.merge(users, user_activities)

Unnamed: 0,id,name,activity
0,2,Celinda Grindstaff,Dropped
1,2,Celinda Grindstaff,Commented
2,3,Jim Blasi,Dropped
3,4,Brian Mazzone,Commented
4,4,Brian Mazzone,Dropped
5,4,Brian Mazzone,Brought
6,5,Evelin Tafoya,Commented
7,5,Evelin Tafoya,Dropped
8,7,Syreeta Mcmanus,Placed Order
9,8,Syreeta Mcmanus,Placed Order


In [4]:
pd.merge(users, user_activities, how='left') ## how could be left, right or outer

Unnamed: 0,id,name,activity
0,1,Roselee Marland,
1,2,Celinda Grindstaff,Dropped
2,2,Celinda Grindstaff,Commented
3,3,Jim Blasi,Dropped
4,4,Brian Mazzone,Commented
5,4,Brian Mazzone,Dropped
6,4,Brian Mazzone,Brought
7,5,Evelin Tafoya,Commented
8,5,Evelin Tafoya,Dropped
9,6,Nelida Morey,


### Many To Many
- In many to many merge if left table has n record and right table has m record for a key result will have n*m records for the key

In [5]:
data_len = 10
temperature = np.random.choice(range(33,38), data_len)
color = np.random.choice("red,blue,yellow,orange".split(","), data_len)
energy = np.random.randn(data_len)*6 + 18


temp_dict = {
    "temperature": temperature,
    "color": color,
    "energy": energy
}


df_temperature = pd.DataFrame(temp_dict)
df_temperature



Unnamed: 0,temperature,color,energy
0,36,yellow,10.083525
1,37,yellow,28.067964
2,36,red,11.129831
3,36,red,15.725133
4,36,yellow,10.496243
5,34,yellow,21.938177
6,36,orange,28.631699
7,36,blue,10.108891
8,37,yellow,24.200041
9,36,yellow,25.123806


In [6]:
data_len = 10
temperature = np.random.choice(range(33,38), data_len)
color = np.random.choice("red,blue,yellow,orange".split(","), data_len)
weight = np.random.randn(data_len)*3 + 8



weight_dict = {
    "temperature": temperature,
    "color": color,
    "weight": weight
}

df_weight = pd.DataFrame(weight_dict)
df_weight

Unnamed: 0,temperature,color,weight
0,37,red,8.397335
1,34,yellow,6.860275
2,35,orange,8.344647
3,34,yellow,10.68434
4,34,blue,7.582409
5,36,yellow,12.293538
6,35,yellow,6.908905
7,37,yellow,9.679595
8,37,blue,7.804843
9,34,blue,6.659487


In [7]:
pd.merge(df_temperature, df_weight)
## is equivalent to
pd.merge(df_temperature, df_weight, left_on=["temperature", "color"], right_on=["temperature", "color"])

Unnamed: 0,temperature,color,energy,weight
0,36,yellow,10.083525,12.293538
1,36,yellow,10.496243,12.293538
2,36,yellow,25.123806,12.293538
3,37,yellow,28.067964,9.679595
4,37,yellow,24.200041,9.679595
5,34,yellow,21.938177,6.860275
6,34,yellow,21.938177,10.68434


In [8]:
display(
    pd.merge(df_temperature, df_weight, on="color").head()
)
pd.merge(df_temperature, df_weight, on="color", suffixes=["_left", "_right"]).head()

Unnamed: 0,temperature_x,color,energy,temperature_y,weight
0,36,yellow,10.083525,34,6.860275
1,36,yellow,10.083525,34,10.68434
2,36,yellow,10.083525,36,12.293538
3,36,yellow,10.083525,35,6.908905
4,36,yellow,10.083525,37,9.679595


Unnamed: 0,temperature_left,color,energy,temperature_right,weight
0,36,yellow,10.083525,34,6.860275
1,36,yellow,10.083525,34,10.68434
2,36,yellow,10.083525,36,12.293538
3,36,yellow,10.083525,35,6.908905
4,36,yellow,10.083525,37,9.679595
