In [1]:
from fablr import dataframes as d
from fablr.sample_assets.artists import artists as artists_list

In [2]:
from fablr.extended_providers import sample_dataframe_provider, sample_list_provider

In [3]:
gen = d.Fablr()
gen.set_seed(123)

In [4]:
from datetime import datetime as dt
date_format = '%Y-%m-%d'
users_dict = {
  'user_id': {'provider':'random_int', 'kwargs':{"min":0, "max": 2e5}},
  'first_name': {'provider': 'first_name', 'kwargs': {}},
  'last_name': {'provider': 'last_name', 'kwargs': {}},
  'email': {'provider': 'email', 'kwargs': {}},
  'last_login': {'provider': 'date_between_dates',
                 'kwargs': {'date_start': dt.strptime('2023-01-01', date_format), 'date_end': dt.strptime('2023-12-01', date_format)}},
  'subscription_tier' : {'provider': 'sample_list',
                         'kwargs':{'list':["Prime", "Standard"], 'unique': False}}
}

users_df = gen.generate_dataframe(100000, users_dict, primary_keys=['user_id'])
users_df.head(3)


       user_id first_name  last_name                      email  last_login  \
0        13726     Joseph       Mack       daniel06@example.net  2023-08-07   
1        89338    Melissa      Moore          ann32@example.org  2023-06-19   
2       156657   Nicholas      Smith        cgarcia@example.org  2023-09-17   
3        33095     Kristy      Smith         qberry@example.com  2023-11-22   
4        79979    Matthew      Solis     lorifowler@example.net  2023-11-11   
...        ...        ...        ...                        ...         ...   
99995   113628    Michael      Jones     youngbryan@example.org  2023-08-21   
99996    99887     Ashley    Herrera  kimberlyellis@example.net  2023-09-26   
99997   142397      James      Adams    trevinoevan@example.net  2023-05-11   
99998   144714    Rebecca  Middleton  holmeskenneth@example.org  2023-03-26   
99999    40949       John      Lopez     taylormark@example.net  2023-02-09   

      subscription_tier  
0              Standard  

Unnamed: 0,user_id,first_name,last_name,email,last_login,subscription_tier
0,13726,Joseph,Mack,daniel06@example.net,2023-08-07,Standard
1,89338,Melissa,Moore,ann32@example.org,2023-06-19,Prime
2,156657,Nicholas,Smith,cgarcia@example.org,2023-09-17,Prime


In [7]:
users_df["user_id"].drop_duplicates().shape[0]

100000

In [5]:
events_dict = {
    'event_id': {'provider':'bothify', 'kwargs':{'text': 'eid_????-#####'}},
    'event_name': {'provider': 'sample_list', 'kwargs': {'list': artists_list}},
    'event_date': {'provider': 'date_between_dates',
                 'kwargs': {'date_start': dt.strptime('2019-01-01', date_format), 'date_end': dt.strptime('2023-12-01', date_format)}},
    'event_location': {'provider': 'city', 'kwargs': {}},
    'event_result': {'provider': 'sample_list', 'kwargs': {'list': ['succesfull']*91 + ['cancelled']*9}},
}
events_df = gen.generate_dataframe(800, events_dict)
events_df.head()

           event_id        event_name  event_date  event_location event_result
0    eid_IrHr-19008          The Cure  2021-11-28    Pricechester   succesfull
1    eid_WUEo-33846           The Jam  2023-06-04    Hartmanshire   succesfull
2    eid_HzPQ-32700   The Stone Roses  2023-07-09     Jimenezport   succesfull
3    eid_WsPj-10149  Blue Oyster Cult  2019-04-24  Fernandezburgh   succesfull
4    eid_LxZU-54142           The Jam  2022-05-19    South Daniel   succesfull
..              ...               ...         ...             ...          ...
795  eid_BRQd-29432       The Beatles  2020-12-22       New Jason   succesfull
796  eid_iWpP-40480          The Cure  2023-04-30  Kruegerchester   succesfull
797  eid_ianu-65117         Radiohead  2020-01-04       East Anna   succesfull
798  eid_ofxG-41112      Led Zeppelin  2021-03-27   Hernandezstad   succesfull
799  eid_uiPI-07345             Oasis  2022-09-15  East Derekport   succesfull

[800 rows x 5 columns]


KeyError: "['hash'] not found in axis"

In [None]:
tickets_dict = {
    'ticket_no': {'provider':'random_int', 'unique': True, 'kwargs':{"min":0, "max": 1e10}},
    'event_id': {'provider':'sample_dataframe', 'kwargs': {'df': events_df, 'column': 'event_id'}},
    'user_id': {'provider':'sample_dataframe', 'kwargs': {'df': users_df, 'column': 'user_id'}},
    'total_charged': {'provider': 'random_float', 'kwargs': {'min': 20, 'max': 800}},
    'surcharge': {'provider': 'sample_list', 'kwargs': {'list': [5.99]*20 + [11.99]*15 + [0]*15}},
}

tickets_df = gen.generate_dataframe(1000, tickets_dict)
tickets_df.head()
    

Unnamed: 0,ticket_no,event_id,user_id,total_charged,surcharge
0,8863622619,eid_pJba-23173,7461881110,208.4,5.99
1,2595583641,eid_ieVK-40945,901424657,791.01,0.0
2,6345534774,eid_msLY-41277,6523944977,132.29,5.99
3,8374337030,eid_rKKk-64647,3989241120,290.97,0.0
4,710922538,eid_sqKp-64982,7592083808,631.23,11.99


In [None]:

total_rev_by_artist_df = (
    events_df
    .join(tickets_df.set_index('event_id'), on='event_id', how='left')
    .groupby('event_name', as_index=False)
    .agg({'total_charged': 'sum'})
)
total_rev_by_artist_df.head()

Unnamed: 0,event_name,total_charged
0,Aerosmith,1633.47
1,Arctic Monkeys,13992.44
2,Blink-182,4323.42
3,Blue Oyster Cult,2228.04
4,Blur,4576.53


In [None]:
from datetime import datetime as dt
import pandas as pd
gen = d.Fablr()
gen.set_seed(123)
test_dict = {
    'col_1': {'provider': 'sample_list', 'kwargs': {'list': [1,2,3,4]}, 'unique': False},
    'col_2': {'provider': 'sample_list', 'kwargs': {'list': ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]}},
    'col_3': {'provider': 'date_between_dates', 'kwargs': {'date_start': dt.strptime('2019-01-01', date_format), 'date_end': dt.strptime('2023-12-01', date_format)}},
    'col_4': {'provider':'sample_list', 'kwargs':{'list':[10,20,30,40,50,60,70,80,90,100], 'unique': True}}
}
test_df = gen.generate_dataframe(10, test_dict)

In [None]:
## assert dataframe
isinstance(test_df, pd.DataFrame)
## assert length
test_df.shape[0] == 10
## assert width 
test_df.shape[1] == 4
## assert dupes in col_1
test_df['col_1'].drop_duplicates().shape[0] < test_df['col_1'].shape[0]
## assert no dupes in col_2



True