In [1]:
import pandas as pd
import numpy as np

In [3]:
pd.set_option('display.max_columns', None)

In [85]:
# Load the datasets
office_df = pd.read_csv('./1.Office.csv')
employee_df = pd.read_csv('./2.Employee.csv')
works_at_df = pd.read_csv('./3.WorksAt.csv')
cost_df = pd.read_csv('./4.Cost.csv')
owner_df = pd.read_csv('./5.Owner.csv')
property_df = pd.read_csv('./6.Property.csv')
owns_df = pd.read_csv('./7.Owns.csv')
client_df = pd.read_csv('./8.Client.csv')
prefers_df = pd.read_csv('./10.Prefers.csv')

In [89]:
client_preferences = prefers_df.merge(client_df, on="ClientID")

# Filter properties based on DesiredTransactionType
client_preferences = client_preferences.merge(property_df, left_on="PreferredPropertyType", right_on="UnitType")

# Apply the desired transaction type filter
client_preferences = client_preferences[
    ((client_preferences["DesiredTransactionType"] == "Purchase") & (client_preferences["ListingType"] == "Sale")) |
    ((client_preferences["DesiredTransactionType"] == "Rent") & (client_preferences["ListingType"] == "Rent"))
]

client_preferences

Unnamed: 0,ClientID,PreferredPropertyType,Budget,FirstName,LastName,DesiredTransactionType,Phone,Email,AddressID,PropertyID,Availability,ListingType,UnitType,SalePrice,LeasePricePerYear,BuildingYear,DateAdded,Street,City,State,ZipCode,Country
0,1,3B2B,23917.0,Daniel,Lebedev,Rent,+7 638 6063 476,daniellebedev3283@yandex.ru,1,1,Yes,Rent,3B2B,,19355.0,1992,2020-11-27,277 Beech Avenue,New Haven,CT,39223.0,US
3,1,3B2B,23917.0,Daniel,Lebedev,Rent,+7 638 6063 476,daniellebedev3283@yandex.ru,1,26,Yes,Rent,3B2B,,23643.0,2016,2019-04-09,179 Hickory Avenue,New Haven,CT,94625.0,US
4,1,3B2B,23917.0,Daniel,Lebedev,Rent,+7 638 6063 476,daniellebedev3283@yandex.ru,1,43,Yes,Rent,3B2B,,18665.0,1988,2022-01-29,361 Mulberry Avenue,Waterbury,CT,72293.0,US
8,1,3B2B,23917.0,Daniel,Lebedev,Rent,+7 638 6063 476,daniellebedev3283@yandex.ru,1,67,No,Rent,3B2B,,20250.0,2008,2023-08-14,586 Basswood Avenue,Jersey City,NJ,47027.0,US
9,1,3B2B,23917.0,Daniel,Lebedev,Rent,+7 638 6063 476,daniellebedev3283@yandex.ru,1,87,No,Rent,3B2B,,20884.0,2003,2021-02-14,976 Walnut Crescent,Stamford,CT,53997.0,US
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33004,300,condo,15609.0,Linda,Lee,Rent,(669) 551-8254,lindalee1008@gmail.com,259,293,No,Rent,condo,,18627.0,1997,2019-08-15,975 Poplar Avenue,Camden,NJ,94639.0,US
33005,300,condo,15609.0,Linda,Lee,Rent,(669) 551-8254,lindalee1008@gmail.com,259,294,Yes,Rent,condo,,12093.0,2020,2023-07-15,583 Pine Plaza,Camden,NJ,11506.0,US
33006,300,condo,15609.0,Linda,Lee,Rent,(669) 551-8254,lindalee1008@gmail.com,259,296,Yes,Rent,condo,,18793.0,1978,2023-09-09,860 Sequoia Avenue,Newark,NJ,25475.0,US
33007,300,condo,15609.0,Linda,Lee,Rent,(669) 551-8254,lindalee1008@gmail.com,259,297,Yes,Rent,condo,,14844.0,2009,2023-01-16,410 Tulip Avenue,Bridgeport,CT,57630.0,US


In [99]:
property_office_state = client_preferences.merge(office_df, on="State")

# Apply the Availability filter
property_office_state = property_office_state[property_office_state["Availability"] == "Yes"]

In [103]:
# Merge with works_at to find which agents work at these offices
property_office_agent = property_office_state.merge(works_at_df, on="OfficeID")

# Merge with employee to get the agent details, and filter by role 'Sales'
property_office_agent_employee = property_office_agent.merge(employee_df, on="EmployeeID")
property_office_agent_employee = property_office_agent_employee[property_office_agent_employee["Role"] == "Sales"]

In [125]:
from datetime import timedelta

# Ensure AppointmentMadeDate is after DateAdded with random days added
valid_appointments = []

# Randomly select 300 rows from property_office_agent_employee
selected_properties = property_office_agent_employee.sample(n=300, random_state=42)

for i, row in selected_properties.iterrows():
    date_added = row["DateAdded"]
    random_days = np.random.randint(1, 30)  # Adding between 1 to 30 days
    appointment_made_date = pd.to_datetime(date_added) + timedelta(days=random_days)
    visit_date = appointment_made_date + timedelta(days=np.random.randint(1, 15))  # Visit date within 15 days of appointment

    valid_appointments.append({
        "ClientID": row["ClientID"],
        "PropertyID": row["PropertyID"],
        "AgentID": row["EmployeeID"],
        "AppointmentMadeDate": appointment_made_date,
        "VisitDate": visit_date
    })

appointment_df = pd.DataFrame(valid_appointments)

# Sort the dataframe by AppointmentMadeDate
appointment_df = appointment_df.sort_values(by="AppointmentMadeDate").reset_index(drop=True)

# Assign custom AppointmentID
appointment_df["AppointmentID"] = ['A' + str(i+1).zfill(3) for i in appointment_df.index]


In [135]:
appointment_df = appointment_df.iloc[:,[5,0,1,2,3,4]]

In [137]:
appointment_df.to_csv('./Appointment.csv', index=False)