## Read data from postgres

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import psycopg2
from sqlalchemy import create_engine

# PostgreSQL connection configuration
postgres_config = {
    "host": "postgres",  # Docker service name
    "port": "5432",
    "database": "postgres",
    "user": "postgres",
    "password": "postgres"
}

# Create SQLAlchemy engine for pandas
engine = create_engine(
    f"postgresql://{postgres_config['user']}:{postgres_config['password']}@"
    f"{postgres_config['host']}:{postgres_config['port']}/{postgres_config['database']}"
)

# Read data from PostgreSQL into pandas DataFrame
query = "SELECT * FROM fb_friend_requests"
table_df = pd.read_sql(query, engine)
print(table_df.head())

  user_id_sender user_id_receiver        date    action
0      ad4943sdz       948ksx123d  2020-01-04      sent
1      ad4943sdz       948ksx123d  2020-01-06  accepted
2     dfdfxf9483       9djjjd9283  2020-01-04      sent
3     dfdfxf9483       9djjjd9283  2020-01-15  accepted
4  ffdfff4234234      lpjzjdi4949  2020-01-06      sent


## Solution

In [2]:
# Sort data by user_id_sender and date to establish chronological order
df_sorted = table_df.sort_values(['user_id_sender', 'date']).reset_index(drop=True)

# Create next_action column using shift to get the subsequent action for each user
df_sorted['next_action'] = df_sorted.groupby('user_id_sender')['action'].shift(-1)

df_sorted

Unnamed: 0,user_id_sender,user_id_receiver,date,action,next_action
0,ad4943sdz,948ksx123d,2020-01-04,sent,accepted
1,ad4943sdz,948ksx123d,2020-01-06,accepted,
2,dfdfxf9483,9djjjd9283,2020-01-04,sent,accepted
3,dfdfxf9483,9djjjd9283,2020-01-15,accepted,
4,ffdfff4234234,lpjzjdi4949,2020-01-06,sent,
5,fffkfld9499,993lsldidif,2020-01-06,sent,accepted
6,fffkfld9499,993lsldidif,2020-01-10,accepted,
7,fg503kdsdd,ofp049dkd,2020-01-04,sent,accepted
8,fg503kdsdd,ofp049dkd,2020-01-10,accepted,
9,hh643dfert,847jfkf203,2020-01-04,sent,


In [3]:
# Filter to only include 'sent' actions (friend requests sent)
df_sent = df_sorted[df_sorted['action'] == 'sent']
df_sent

Unnamed: 0,user_id_sender,user_id_receiver,date,action,next_action
0,ad4943sdz,948ksx123d,2020-01-04,sent,accepted
2,dfdfxf9483,9djjjd9283,2020-01-04,sent,accepted
4,ffdfff4234234,lpjzjdi4949,2020-01-06,sent,
5,fffkfld9499,993lsldidif,2020-01-06,sent,accepted
7,fg503kdsdd,ofp049dkd,2020-01-04,sent,accepted
9,hh643dfert,847jfkf203,2020-01-04,sent,
10,r4gfgf2344,234ddr4545,2020-01-06,sent,accepted


In [4]:
# Calculate acceptance percentage
total_sent_requests = len(df_sent)
print('total sent:', total_sent_requests)

accepted_requests = len(df_sent[df_sent['next_action'] == 'accepted'])
print('total accepted:', accepted_requests)

acceptance_rate = round((accepted_requests / total_sent_requests) * 100, 2)
print('acceptance percentage:', acceptance_rate)

total sent: 7
total accepted: 5
acceptance percentage: 71.43
