# Dummy Availability Table Creator

### Goal: Create a dummy availability table so I can execute the table matching and check the logic while I wait for Kevin to develop the actual logic

In [1]:
import pandas as pd
import numpy as np

In [4]:
#Load the combined reviews table to understand how many listings have >3 reviews by the same person
reviews = pd.read_csv("q6_comb_reviews.csv")
reviews.head()

Unnamed: 0.1,Unnamed: 0,listing_id,id,reviewer_id,review_month
0,0,12899,24767,69327,1
1,1,12899,29230,72846,3
2,2,12899,29806,84196,3
3,3,12899,32572,89114,3
4,4,12899,32862,100318,4


### Understanding the prevelance of thrice+ reviewed listings

In [21]:
#Grouping listings
grouped_listings = reviews.groupby(['listing_id', 'reviewer_id'])['id'].count()
grouped_listings = grouped_listings.to_frame()
grouped_listings = grouped_listings.reset_index()
grouped_listings = grouped_listings.rename({"id":"review_counts"}, axis = 1)
#grouped_listings1

In [22]:
#Selecting where review_counts > 3 - 7750 rows
grouped_listings.loc[grouped_listings['review_counts'] >= 3]

Unnamed: 0,listing_id,reviewer_id,review_counts
231,3021,82971860,3
1635,9140,54090633,3
1969,12899,791723,3
1987,12899,1227853,5
2065,12899,3132153,3
...,...,...,...
2511389,789101243357990362,485134984,6
2511777,792942008294249400,485134984,4
2512605,798843846505276637,454135038,3
2512897,802203851341181214,20189866,3


In [26]:
#Getting all the results for listing_id 12899 to make a test table for my logic
test_table = grouped_listings.loc[grouped_listings['listing_id'] == 12899]
test_table

<class 'pandas.core.frame.DataFrame'>
      listing_id  reviewer_id  review_counts
1878       12899        12386              1
1879       12899        24799              1
1880       12899        26035              2
1881       12899        39329              1
1882       12899        49785              1
...          ...          ...            ...
2461       12899    423723480              1
2462       12899    431740869              1
2463       12899    431967278              1
2464       12899    434159024              1
2465       12899    454574110              1

[588 rows x 3 columns]


In [27]:
#In our test table, how many reviews have review counts >3 - 5
test_table.loc[test_table['review_counts'] >= 3]

Unnamed: 0,listing_id,reviewer_id,review_counts
1969,12899,791723,3
1987,12899,1227853,5
2065,12899,3132153,3
2133,12899,7609608,5
2154,12899,9605958,3


### Creating a dummy availability table to test matching and logic

In [None]:
'''Goal:
    create a 2 column table with each listing id and it's availability dates
Need to:
    get all the listing_ids
    make up some availability dates (make consistent for checking)
    combine into a dataframe'''

In [49]:
#Getting the listing ids
listing_id_list = reviews.listing_id.unique()

In [51]:
#Creating a dataframe & adding col with avail months
dummy_avail = pd.DataFrame(listing_id_list, columns = ['listing_id'])
dummy_avail['avail_months'] = '1,2,3'
dummy_avail.head()

Unnamed: 0,listing_id,avail_months
0,12899,123
1,789798,123
2,801318,123
3,808393,123
4,838961,123


In [52]:
#Testing the dummy avail table has the right number of rows
if len(dummy_avail) == len(listing_id_list):
    print("Dummy avail table has correct number of rows", len(dummy_avail))
else:
    print("Dummy avail table wrong length for unique number of ids")

Dummy avail table has correct number of rows 45943


In [54]:
#Altering availability dates so can ensure the matching was done correctly in
#the main query 6 script
#Most rows will be 1,2,3
#listing_id 3021 will be 4,5,6
#listing_id 9140 will be 7,8,9
dummy_avail.loc[dummy_avail['listing_id'] == 3021, "avail_months"] = '4,5,6'
dummy_avail.loc[dummy_avail['listing_id'] == 9140, "avail_months"] = '7,8,9'

In [61]:
#Testing that the changes updated correctly
print(dummy_avail.loc[dummy_avail['listing_id'] == 3021])
print(dummy_avail.loc[dummy_avail['listing_id'] == 9140])
print(dummy_avail.loc[dummy_avail['listing_id'] == 12899])
print(dummy_avail.loc[dummy_avail['listing_id'] == 801318])

      listing_id avail_months
3971        3021        4,5,6
      listing_id avail_months
4004        9140        7,8,9
   listing_id avail_months
0       12899        1,2,3
   listing_id avail_months
2      801318        1,2,3


In [62]:
#Saving the dummy availability table
dummy_avail.to_csv("dummy_availability_table.csv")