# Generate transactions

In [None]:
import pandas as pd
import numpy as np

# Load the datasets
office_df = pd.read_csv('./1.Office.csv')
employee_df = pd.read_csv('./2.Employee.csv')
owner_df = pd.read_csv('./5.Owner.csv')
property_df = pd.read_csv('./6.Property.csv')
owns_df = pd.read_csv('./7.Owns.csv')
client_df = pd.read_csv('./8.Client.csv')
prefers_df = pd.read_csv('./10.Prefers.csv')
appointment_df = pd.read_csv('./11.Appointment.csv')

# Merge appointment with property and client data
merged_df = appointment_df.merge(property_df, on='PropertyID').merge(client_df, on='ClientID').merge(prefers_df, on='ClientID')

# Determine the correct price column based on DesiredTransactionType
merged_df['Price'] = merged_df.apply(
    lambda row: row['SalePrice'] if row['DesiredTransactionType'] == 'Purchase' else row['LeasePricePerYear'], axis=1
)

# Filter based on budget
valid_transactions_df = merged_df[merged_df['Price'] <= merged_df['Budget']]

# Ensure 'VisitDate' is in datetime format
valid_transactions_df['VisitDate'] = pd.to_datetime(valid_transactions_df['VisitDate'])

# Generate random transaction date later than the appointment date
valid_transactions_df['TransactionDate'] = valid_transactions_df['VisitDate'] + pd.to_timedelta(np.random.randint(1, 365, size=len(valid_transactions_df)), unit='D')

# Add transaction type column
valid_transactions_df['TransactionType'] = valid_transactions_df['DesiredTransactionType']

# Separate price columns
valid_transactions_df['PurchasePrice'] = np.where(valid_transactions_df['TransactionType'] == 'Purchase', valid_transactions_df['Price'], np.nan)
valid_transactions_df['LeasePricePerYear'] = np.where(valid_transactions_df['TransactionType'] == 'Rent', valid_transactions_df['Price'], np.nan)

# Generate lease year for rent transactions
valid_transactions_df['LeaseYear'] = np.where(valid_transactions_df['TransactionType'] == 'Rent', np.random.randint(1, 6, size=len(valid_transactions_df)), np.nan)

# Remove duplicate Client-Property combinations. We will manually insert some exceptions later.
valid_transactions_df = valid_transactions_df.drop_duplicates(subset=['ClientID', 'PropertyID'])

# Ensure properties for sale are sold only once. We will manually insert some exceptions later.
sale_transactions = valid_transactions_df[valid_transactions_df['TransactionType'] == 'Purchase'].drop_duplicates(subset=['PropertyID'])
rent_transactions = valid_transactions_df[valid_transactions_df['TransactionType'] == 'Rent']

# Combine sale and rent transactions
transactions_df = pd.concat([sale_transactions, rent_transactions])

# Sort transactions by TransactionDate
transactions_df = transactions_df.sort_values(by='TransactionDate').reset_index(drop=True)

# Assign TransactionID
transactions_df['TransactionID'] = ['T{:03d}'.format(i+1) for i in range(len(transactions_df))]

# Drop the VisitDate column
transactions_df = transactions_df.drop(columns=['VisitDate'])

# Reorder columns to place TransactionID first
transactions_df = transactions_df[['TransactionID', 'ClientID', 'PropertyID', 'AgentID', 'TransactionDate', 'TransactionType', 'PurchasePrice', 'LeasePricePerYear', 'LeaseYear']]

# Update availability

In [None]:
property_df.loc[property_df['PropertyID'].isin(transactions_df['PropertyID']), 'Availability'] = 'No'

# Save the updated property DataFrame to a CSV file
property_df.to_csv('/mnt/data/updated_property.csv', index=False)

# Display the updated property dataframe
property_df.head()