In [77]:
import numpy as np
import pandas as pd
import datetime as dt


In [78]:
# set a seed for reproducibility
np.random.seed(0)
random_values = np.random.rand(10)
print(random_values)

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


In [79]:
random_serie = pd.Series(random_values, name="random_values")
random_serie

0    0.548814
1    0.715189
2    0.602763
3    0.544883
4    0.423655
5    0.645894
6    0.437587
7    0.891773
8    0.963663
9    0.383442
Name: random_values, dtype: float64

In [80]:
# Convert serie to data frame
random_df = random_serie.to_frame()
random_df

Unnamed: 0,random_values
0,0.548814
1,0.715189
2,0.602763
3,0.544883
4,0.423655
5,0.645894
6,0.437587
7,0.891773
8,0.963663
9,0.383442


### Create a Time series data frame using map

- Likert scale:  Strongly disagree, Disagree, Neither agree nor disagree, Agree, Strongly agree.
- Class standing: Freshman, sophomore, junior, senior
- Socioeconomic standing: Lower, middle, and upper class
- Quality: Very high, high, medium, low, very low
- Condition: Excellent, Very good, Good, Bad, Very bad

In [81]:
# Create pandas data frame
data_points = 100
conditions = ['excellent', 'very good', 'good', 'bad', 'very bad']
quality = ['veryhigh', 'high', 'medium', 'low', 'very low']
socials = ['low', 'middle', 'upper class']
years = ['freshman', 'sophomore', 'junior', 'senior']
opinions = ['disagree+', 'disagree', 'neutral', 'agree', 'agree+']

start_date = dt.date.today()
np.random.seed(data_points)

random_df = pd.DataFrame(
  {
    "value": np.random.rand(data_points),
    "conditions": [ np.random.choice(conditions) for _ in range(data_points) ],
    "quality": [np.random.choice(quality) for _ in range(data_points)],
    "socials": [np.random.choice(socials) for _ in range(data_points)],
    "years": [np.random.choice(years) for _ in range(data_points)],
    "opinions": [np.random.choice(opinions) for _ in range(data_points)],
  },
  
  index=pd.date_range(end=start_date, freq='1D',
                      periods=data_points, name='date')
)
random_df


Unnamed: 0_level_0,value,conditions,quality,socials,years,opinions
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-25,0.543405,very good,low,upper class,senior,neutral
2021-10-26,0.278369,excellent,very low,middle,senior,neutral
2021-10-27,0.424518,very bad,medium,upper class,junior,agree
2021-10-28,0.844776,good,veryhigh,middle,freshman,agree+
2021-10-29,0.004719,excellent,high,upper class,sophomore,disagree
...,...,...,...,...,...,...
2022-01-28,0.658940,excellent,high,upper class,sophomore,agree
2022-01-29,0.254258,bad,very low,upper class,junior,disagree+
2022-01-30,0.641101,excellent,low,low,freshman,disagree+
2022-01-31,0.200124,very good,high,upper class,junior,disagree+


In [82]:
random_df.describe()

Unnamed: 0,value
count,100.0
mean,0.472367
std,0.290685
min,0.004719
25%,0.233195
50%,0.428111
75%,0.685887
max,0.992158


In [83]:
print(random_df.conditions.value_counts())
print(random_df.conditions.unique())

good         26
very good    20
excellent    19
very bad     18
bad          17
Name: conditions, dtype: int64
['very good' 'excellent' 'very bad' 'good' 'bad']


### Create random data frame using list

In [84]:
states = ['checkout', 'address', 'shipment', 'payment', 'complete']
payments = [ 'completed', 'void', 'balanced_due', 'credit_owed']

zip_data = zip(
    np.random.choice(states, size=data_points),
    np.random.choice(payments, size=data_points),
    np.random.rand(1, data_points)[0],
    pd.date_range(end=dt.date.today(), freq='1D', periods=data_points)
)
np.random.choice(states, size=data_points)
np.random.rand(1, data_points)[0]
pd.date_range(end=dt.date.today(), freq='1D', periods=data_points)

data = list(zip_data)
list_df = pd.DataFrame(data=list(data), columns=[
             'state', 'payment', 'value', 'date'])

list_df.set_index('date')


Unnamed: 0_level_0,state,payment,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-10-25,checkout,balanced_due,0.093966
2021-10-26,checkout,balanced_due,0.116140
2021-10-27,payment,balanced_due,0.231396
2021-10-28,address,void,0.886830
2021-10-29,shipment,credit_owed,0.869947
...,...,...,...
2022-01-28,payment,credit_owed,0.926213
2022-01-29,complete,completed,0.126568
2022-01-30,shipment,credit_owed,0.406643
2022-01-31,shipment,credit_owed,0.288605
