# Import Required Libraries

In [15]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from sqlalchemy import create_engine
import configparser

# Create Random Datasets for Creating DataFrame

In [16]:

# Define the number of months and the initial retention rate
num_months = 120
initial_retention_rate = 0.2  # Initial monthly retention rate

# Function to generate realistic monthly retention rates
def generate_retention_rates(num_months, initial_rate):
    months = list(range(1, num_months + 1))
    retention_rates = []
    for month in months:
        # Model logistic growth to gradually increase retention rates over time
        retention_rate = 1 / (1 + np.exp(-0.5 * (month - 6)))
        # Scale the retention rate based on the initial rate
        retention_rate = initial_rate + (1 - initial_rate) * retention_rate
        retention_rates.append(retention_rate)
    return retention_rates

# Generate realistic monthly retention rates
retention_rates = generate_retention_rates(num_months, initial_retention_rate)

# Generate random data for each column
data = {
    'record_date': [datetime.now().date() - timedelta(days=30*i) for i in range(num_months)],
    'beginning_users': [random.randint(1000, 5000) for _ in range(num_months)],
    'new_users': [random.randint(50, 500) for _ in range(num_months)],
    'lost_users': [random.randint(20, 200) for _ in range(num_months)],
    'logins': [random.randint(100, 1000) for _ in range(num_months)]
}

# Calculate end users
data['end_users'] = [beginning + new - lost for beginning, new, lost in zip(data['beginning_users'], data['new_users'], data['lost_users'])]

# Calculate active users using retention rates
data['active_users'] = [int(end_users * retention_rate) for end_users, retention_rate in zip(data['end_users'], retention_rates)]
data['monthly_retention'] = retention_rates
data['monthly_churn'] = 1 - np.array(retention_rates)

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
display(df.head())

Unnamed: 0,record_date,beginning_users,new_users,lost_users,logins,end_users,active_users,monthly_retention,monthly_churn
0,2024-03-31,2067,363,109,573,2321,605,0.260687,0.739313
1,2024-03-01,4232,158,188,608,4202,1241,0.295362,0.704638
2,2024-01-31,1968,121,112,336,1977,683,0.34594,0.65406
3,2024-01-01,2846,480,57,292,3269,1357,0.415153,0.584847
4,2023-12-02,1842,120,20,811,1942,974,0.502033,0.497967


# Load Random Data to Postgresql Database

In [17]:
#Assign required credentials from con.txt file
config = configparser.ConfigParser()
config.read('.env')

#INPUT YOUR OWN CONNECTION STRING HERE
conn_string = config['DBCRED']['conn']

#perform to_sql test and print result
db = create_engine(conn_string)
conn = db.connect()
print(f"writing {len(df)} records")
df.to_sql('user_activity', con=conn, if_exists='replace', index=False)

writing 120 records


120