In [91]:
import pandas as pd

from pathlib import Path
from tqdm.notebook import tqdm

# Identifying Listings that do not Conform with New RESO Standards

The purpose of this notebook is to identify which listings are missing required fields and what those fields are for each listing. In order to get the listings and the required fields, run the `listings.sql` query. This query is also located in [ReDash here](https://redash.mgmt.perchwell.com/queries/1036). After updating the `brokerage_id` in the query, run it and save it in the `data/` directory. Once this is done, update the `brokerage_id` in the cell below as well.

In [92]:
BROKERAGE_ID = [4885]

## Getting Required Fields from Requirements.csv
This first section reads in the requirements sheet, splits them by whether they are required or conditional fields, and provides all these fields to be pasted into the query if needed.

In [93]:
df = pd.read_csv('REBNY add_edit_ field requirements - Requirements.csv')
df.head(10)

Unnamed: 0,element_id,Virtual attribute type,Label,Required condition,Listing type
0,sale_or_rental,,Sale or rental,TRUE,"Rental, Sale"
1,lease_type,,Lease type,"{""and"": [\n {""=="" : [ { ""var"" : ""sale_or_rent...",Rental
2,building_id,,Address,TRUE,"Rental, Sale"
3,property_type__sales,"listing, property_type_code",Property type,TRUE,"Rental, Sale"
4,structure_type,"feature, structure_type",Structure type,TRUE,"Rental, Sale"
5,units_attributes,,Unit number,"{""some"": [\n [{""var"": ""property_type__sales""}...","Rental, Sale"
6,tax_block,"feature, tax_block",Tax block,TRUE,"Rental, Sale"
7,tax_lot,"feature, tax_lot",Tax lot,"\n {""!"": [\n [{""var"": ""property_type__sale...","Rental, Sale"
8,status_code,"listing, standard_status",Status,TRUE,"Rental, Sale"
9,contract_date,"listing, contract_date",Contract date,"{ ""or"" : [\n{""=="" : [ { ""var"" : ""status_code"" ...","Rental, Sale"


In [94]:
df_req = df[df['Required condition'] == 'TRUE']
df_cond = df[df['Required condition'] != 'TRUE']

In [95]:
len(df_req), len(df_cond)

(27, 62)

In [96]:
query = '''select listings.id, 
listings.brokerage_id, 
reso_reso_properties.listing_id, 
reso_reso_properties.unparsed_address,
reso_reso_properties.unit_number,
reso_reso_properties.list_agent_full_name,
reso_reso_properties.co_list_agent_full_name,
'''

In [97]:
query += '-- Required Fields\n'
for var, var1 in zip(df_req['element_id'], df_req['Virtual attribute type']):
    if pd.isna(var1):
        query += f'{var},\n'
    else:
        tmp = var1.split(',')
        query += f'{tmp[0]}s.{tmp[1].strip()},\n'
query += '-- Conditional Fields\n'
for var, var1 in zip(df_cond['element_id'], df_cond['Virtual attribute type']):
    if pd.isna(var1):
        query += f'{var},\n'
    else:
        tmp = var1.split(',')
        query += f'{tmp[0]}s.{tmp[1].strip()},\n'
print(query)

select listings.id, 
listings.brokerage_id, 
reso_reso_properties.listing_id, 
reso_reso_properties.unparsed_address,
reso_reso_properties.unit_number,
reso_reso_properties.list_agent_full_name,
reso_reso_properties.co_list_agent_full_name,
-- Required Fields
sale_or_rental,
building_id,
listings.property_type_code,
features.structure_type,
features.tax_block,
listings.standard_status,
list_date,
expiration,
lease_term_min_months,
listings.concessions_given,
listing_agreement,
listings.cobroke_agreement,
propertiess.agent_id,
office_id,
listings.buyer_brokerage_compensation_type,
description,
showing_instructions,
published,
features.year_built,
amenitys.garage,
features.elevators_total,
attendance_type ,
features.stories_total,
features.number_of_units_total,
features.laundry_features,
features.bedrooms,
rooms,
-- Conditional Fields
lease_type,
units_attributes,
features.tax_lot,
listings.contract_date,
sale_date,
sale_price,
listingss.buyer_agent_id,
listings.cancellation_date,
furni

In [98]:
switch = False
elements = []
for y in df_cond['Required condition']:
    for x in y.split(':'):
        if switch:
            elements.append(x[x.find('"')+1:x[x.find('"')+1:].find('"')+2])
            switch = False
        if 'var' in x:
            switch = True

In [99]:
set(elements)

{'',
 'basement_y_n',
 'bonus_y_n',
 'buyer_brokerage_compensation_type',
 'commission_type',
 'concessions_given',
 'configuration',
 'cooling_y_n',
 'flip_tax_amount',
 'flip_tax_pct',
 'flip_tax_type__form_sections',
 'furnished',
 'garage_spaces_assigned_y_n',
 'heating_y_n',
 'listing_agreement',
 'new_construction_y_n',
 'new_development_y_n',
 'owner_pays_plus_concessions_y_n',
 'pet_policy',
 'property_type__sales',
 'published',
 'rls',
 'sale_or_rental',
 'showing_start_time',
 'status_code',
 'tax_abatement_y_n__form_sections',
 'total_fireplaces',
 'view_y_n'}

In [100]:
df_req

Unnamed: 0,element_id,Virtual attribute type,Label,Required condition,Listing type
0,sale_or_rental,,Sale or rental,True,"Rental, Sale"
2,building_id,,Address,True,"Rental, Sale"
3,property_type__sales,"listing, property_type_code",Property type,True,"Rental, Sale"
4,structure_type,"feature, structure_type",Structure type,True,"Rental, Sale"
6,tax_block,"feature, tax_block",Tax block,True,"Rental, Sale"
8,status_code,"listing, standard_status",Status,True,"Rental, Sale"
16,list_date,,List date,True,"Rental, Sale"
21,expiration,,Expiration date,True,"Rental, Sale"
22,lease_term_min_months,,Minimum lease months,True,Rental
38,concessions_given,"listing, concessions_given",Concessions,True,"Rental, Sale"


In [101]:
def get_name(element_id, virtual_id):
    if pd.isna(virtual_id):
        return element_id
    else:
        return virtual_id.split(',')[1].strip()

In [102]:
df_req['field_name'] = [get_name(x,y) for x,y in zip(df_req['element_id'], df_req['Virtual attribute type'])]
df_cond['field_name'] = [get_name(x,y) for x,y in zip(df_cond['element_id'], df_cond['Virtual attribute type'])]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_req['field_name'] = [get_name(x,y) for x,y in zip(df_req['element_id'], df_req['Virtual attribute type'])]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cond['field_name'] = [get_name(x,y) for x,y in zip(df_cond['element_id'], df_cond['Virtual attribute type'])]


## Processing Listings
This section processes the listings from `listings.sql` and identifies whether they satisify the requirements or not.

In [103]:
listings_df = pd.read_csv('data/RESO_Required_Fields_-_Active,_In_Contract,_Pending_Listings_2023_11_13.csv')
# listings_df = pd.read_csv('data/all_ny_listings_231110.csv')

In [104]:
keys = []
for index, row in tqdm(listings_df.iterrows(), total=len(listings_df)):
    key = ''
    for i, r in df_req.iterrows():
        # checking required fields
        if pd.isna(row[r['field_name'].strip()]) and row['sale_or_rental'] in r['Listing type']:
            key += r['Label']
            key += '|'
    
    # checking conditional requirements
    if row.sale_or_rental == 'R' and row.property_type_code == 400 and pd.isna(row.lease_type):
        key += 'Lease type|'
    #elif row.property_type_code in [400, 530, 540, 550, 570, 580] and pd.isna(row.units_attributes):
     #   key += 'units_attributes|'
    if row.property_type_code not in [100, 400] and pd.isna(row.tax_lot):
        key += 'Tax Lot|'
    if row.standard_status in ["Pending", "Sold"] and pd.isna(row.contract_date):
        key += 'Contract Date|'
    if row.standard_status == "Sold" and pd.isna(row.close_date):
        key += 'Close Date|'
    if row.standard_status == "Sold" and pd.isna(row.close_price):
        key += 'Close Price|'
    if row.standard_status == "Sold" and pd.isna(row.buyer_agent_id):
        key += 'Buyer agents|'
    if row.standard_status == "Cancelled" and pd.isna(row.cancellation_date):
        key += 'Cancellation Date|'
    if row.sale_or_rental == 'R' and row.property_type_code not in [530,"530",540,"540",550,"550",560,"560",580,"580",570,"570",590,"590"] and pd.isna(row.furnished):
        key += 'furnished'
    if row.sale_or_rental == 'S' and row.property_type_code in [530, 540, 550] and pd.isna(row.special_listing_conditions):
        key += 'Special listing conditions|'
    if pd.isna(row.list_price):
        key += 'List price|'
    #elif row.sale_or_rental == 'R' and pd.isna(row.rental_price):
     #   key += 'rental_price|'
    if row.sale_or_rental == 'R' and row.furnished in ['furnished', 'partially', 'negotiable'] and pd.isna(row.furnished_rent):
        key += 'Furnished rental price|'
    if row.sale_or_rental == 'R' and row.property_type_code in [530,"530",540,"540",550,"550",560,"560",580,"580",570,"570",590,"590"] and pd.isna(row.available_date):
        key += 'Availability date|'
    if row.furnished in ['Yes', 'Optional'] and row.property_type_code not in [100, 400] and row.sale_or_rental == 'R' and pd.isna(row.lease_term_max_months):
        key += 'Maximum lease months|'
    if row.property_type_code in [530, "530",  540, "540", 550, "550"] and row.sale_or_rental == 'S' and pd.isna(row.max_financing_pct):
        key += 'Max financing (%)|'
    if row.property_type_code in [530, "530",  540, "540", 550, "550"] and row.sale_or_rental == 'S' and pd.isna(row.maximum_financing_remarks):
        key += 'Max financing remarks|'
    if row.property_type_code in [530, "530", 550, "550"] and row.sale_or_rental == 'S' and pd.isna(row.number_of_shares):
        key += 'Number of shares|'
    if row.property_type_code in [540, "540"] and row.sale_or_rental == 'S' and pd.isna(row.percent_of_common_elements):
        key += 'Percent of common elements|'
    if row.property_type_code in [530, "530",  540, "540", 550, "550"] and row.sale_or_rental == 'S' and pd.isna(row.hoa_fee):
        key += 'Monthlies/recurring fees|'
    if row.property_type_code in [530, "530",  540, "540", 550, "550"] and row.sale_or_rental == 'S' and pd.isna(row.association_fee_frequency):
        key += 'Fee frequency|'
    if row.property_type_code == 540 and row.sale_or_rental == 'S' and pd.isna(row.real_estate_tax):
        key += 'Fee frequency|'
    if row.property_type_code in [530, "530",  540, "540", 550, "550"] and row.sale_or_rental == 'S' and (pd.isna(row.flip_tax_pct) and pd.isna(row.flip_tax_amount)):
        key += 'Flip tax amount ($)|Flip tax (%)|'
    if row.flip_tax_amount > 0 or row.flip_tax_pct > 0 and pd.isna(row.flip_tax_description):
        key += 'Flip tax remarks|'
    if row.concessions_given == 'yes' and pd.isna(row.concessions_comments):
        key += 'Concessions comments|'
    if row.concessions_given == 'yes' and row.commission_type == 'Owner Pays' and row.sale_or_rental == 'R' and pd.isna(row.owner_pays_plus_concessions_y_n):
        key += 'Owner pays commission + concessions?|'
    if (row.owner_pays_plus_concessions_y_n == True or row.commission_type == 'Owner Pays') and row.sale_or_rental == 'R' and pd.isna(row.owner_pays):
        key += 'Owner pays|'
    if (row.owner_pays_plus_concessions_y_n == True or row.commission_type == 'Owner Pays') and row.sale_or_rental == 'R' and pd.isna(row.owner_pays_remarks):
        key += 'Owner pays remarks|'
    if row.sale_or_rental == 'R' and row.property_type_code != 100 and pd.isna(row.commission_type):
        key += 'Rental compensation type|'
    #elif row.buyer_brokerage_compensation_type == 'percent' and pd.isna(row.commission_amount_percentage):
     #   key += 'commission_amount_percentage|'
    #elif row.buyer_brokerage_compensation_type == 'dollar' and pd.isna(row.commission_amount_dollar):
     #   key += 'commission_amount_dollar|'
    if row.commission_type == 'Co-broke' and pd.isna(row.commission_remarks):
        key += 'Commission remarks|'
    if row.bonus_y_n == True and row.sale_or_rental == 'S' and pd.isna(row.bonus):
        key += 'Bonus remarks|'
    if (not pd.isna(row.showing_start_time)) and pd.isna(row.showing_end_time):
        key += 'End time|'
    if row.published == True and row.property_type_code != 100 and pd.isna(row.rls):
        key += 'rls|'
    if row.property_type_code not in [400, 100] and pd.isna(row.overall_condition):
        key += 'Property condition|'
    if row.new_development_y_n == True or row.new_construction_y_n == True and pd.isna(row.sponsor_unit_y_n):
        key += 'Resale/sponsor|'
    if row.property_type_code in [560, 580, 570, 590] and pd.isna(row.lot_size_area):
        key += 'Lot size area|'
    if row.property_type_code in [560, 580, 570, 590] and pd.isna(row.lot_size_units):
        key += 'Lot size units|'
    if row.property_type_code in [560, 580, 570, 590] and pd.isna(row.lot_size_dimensions):
        key += 'Lot size dimensions|'
    if row.property_type_code in [560, 580, 570, 590] and pd.isna(row.building_size_dimensions):
        key += 'Building dimensions|'
    if (row.property_type_code in [560, "560",  570, "570",  580, "580",   590, "590"] or row.configuration_type in ['duplex', 'triplex']) and pd.isna(row.building_area_total):
        key += 'Building area total|'
    if (row.property_type_code in [560, "560",  570, "570",  580, "580",   590, "590"] or row.configuration_type in ['duplex', 'triplex']) and pd.isna(row.building_area_units):
        key += 'Building area units|'
    if row.property_type_code == 540 and pd.isna(row.lot_size_square_feet):
        key += 'Living area (sq ft)|'
    if row.garage == True and pd.isna(row.garage_spaces):
        key += 'Number of assigned spaces|'
    if row.property_type_code != 100 and pd.isna(row.pet_policy):
        key += 'Pets allowed|'
    if ('unit_size_limit' in str(row.pet_policy) or 'unit_number_limit' in str(row.pet_policy) or 'building_size_limit' in str(row.pet_policy) or 'building_number_limit' in str(row.pet_policy) or 'breed_restrictions' in str(row.pet_policy)) and pd.isna(row.pet_policy_remarks):
        key += 'Pets allowed comments|'
    if row.basement_y_n ==  True and pd.isna(row.basement_type):
        key += 'Basement|'
    if row.heating_y_n ==  True and pd.isna(row.heating):
        key += 'Heating features|'
    if row.cooling_y_n ==  True and pd.isna(row.cooling):
        key += 'Cooling features|'
    if row.view_y_n == True and pd.isna(row.view):
        key += 'view|'
    if row.total_fireplaces > 0 and pd.isna(row.fireplace_features):
        key += 'Fireplace features|'
    if (row.property_type_code != 100 or row.property_type_code != 400) and pd.isna(row.bathrooms):
        key += 'Full bathrooms|'
    if (row.property_type_code != 100 or row.property_type_code != 400) and pd.isna(row.half_bathrooms):
        key += 'Half bathrooms|'
    if row.rls == True and pd.isna(row.rls_syndication_portals):
        key += 'REBNY syndication portals|'
    keys.append(key)

  0%|          | 0/2479 [00:00<?, ?it/s]

In [105]:
listings_df['keys'] = keys

## Filtering to just the listings that have missing fields

In [106]:
l = listings_df[listings_df['keys'] != ''][['brokerage_id',	'listing_id','unparsed_address', 'unit_number',	'list_agent_full_name',	'co_list_agent_full_name',	'sale_or_rental',	'standard_status',	'list_date','expiration','bedrooms', 'rooms','contract_date','sale_date','sale_price', 'cancellation_date', 'list_price', 'keys']]
print(len(l), len(listings_df))

2025 2479


In [107]:
for brokerage_id in BROKERAGE_ID:
    agents = set(l[l['brokerage_id'] == brokerage_id]['list_agent_full_name'])
    Path(f'data/{brokerage_id}').mkdir(parents=True, exist_ok=True)
    l[(l['list_agent_full_name'].isin(agents)) | (l['co_list_agent_full_name'].isin(agents))].to_excel(f'data/{brokerage_id}/master_missing_fields.xlsx')

    for a in agents:
        l[(l['list_agent_full_name'] == a) | (l['co_list_agent_full_name'] == a)].to_csv(f'data/{brokerage_id}/{a}.csv', index=False)

In [108]:
agents

{'532 W 20th Sales Office',
 '561 Pacific Street Sales Office',
 'Abigail Palanca',
 'Alen Moshkovich',
 'Alix Trachtman',
 'Amy Herman',
 'Amy Wang',
 'Andrew Appell',
 'Andrew Arrigo',
 'Andrew Blee',
 'Andrew Klima',
 'Andrew Mak',
 'Andy Klaric',
 'Annette Akers',
 'Anya Turcheck',
 'Ariel Mahgerefteh',
 'Ariel Sassoon',
 'Ashlei De Souza',
 'Ashley Brooke',
 'Batya Rivkin',
 'Bernadette Brennan',
 "Bianca D'Alesandro",
 'Brad Mohr',
 'Brandon Bogard',
 'Brian Chan',
 'Brianah Moore',
 'Brittney Haynes',
 'Carlos Simoes',
 'Casey Joe',
 'Celine Coudert',
 'Chase Landow',
 'Craig Beem',
 'Daniel Rosenstein',
 'David Barkstedt',
 'David Miranda',
 'Dawn Watson',
 'Donna Strugatz',
 'Edgar Iglesias',
 'Elaine Dealy',
 'Emanuele Fiore',
 'Emily Yaffe',
 'Evelyn Robleto',
 'Evelyn Torres',
 'Extell Marketing Group & SERHANT. (Sales Office)',
 'Gabrielle Gilbert',
 'Georgios Gerontidis',
 'Greg Vladi',
 'Irina Balueva',
 'Issy Leiter',
 'Jack Richardson',
 'Jade Shenker',
 'Jamin Lin',
 

In [109]:
listings_df.head()

Unnamed: 0,id,brokerage_id,listing_id,unparsed_address,unit_number,list_agent_full_name,co_list_agent_full_name,agent_id,sale_or_rental,building_id,...,cooling_y_n,garage_spaces_assigned_y_n,new_construction_y_n,new_development_y_n,heating_y_n,showing_start_time,total_fireplaces,rls,rls_syndication_portals,keys
0,7716976,2162,PRCH-7716976,220 North 6th Street,1,Macie Barnes,,94920,S,140655,...,,False,,True,,,,True,True,Elevators total|Laundry features|Max financing...
1,7738458,1824,PRCH-7738458,35 West 15th Street,13A,Lisa Larson,Angela Wu,113981,S,11219,...,,False,,False,,,,True,True,Elevators total|Max financing (%)|Flip tax amo...
2,7648466,25228,PRCH-7648466,61 West 62nd Street,16MN,Ethan Leifer,Jackie Fogerty,254774,S,27521,...,,False,,False,,,,True,True,
3,7750166,1210,PRCH-7750166,410 West 53rd Street,528,Orhan Chapkanov,,93257,R,11837,...,,False,,False,True,,,True,True,Owner pays remarks|
4,7556323,1824,PRCH-7556323,467 Central Park West,16B,Robin L. Rothman,,112714,S,35432,...,,False,,False,,,,True,True,Max financing remarks|


In [110]:
all_keys = []
for k in l['keys']:
    all_keys += (k.split('|'))

In [111]:
all_keys = set(all_keys)

In [112]:
stats = {'missing_field': [], 'num_listings': []}
for k in all_keys:
    count = 0
    for i in l['keys']:
        if k in i:
            count += 1
    stats['missing_field'].append(k)
    stats['num_listings'].append(count)

In [113]:
stats

{'missing_field': ['',
  'Half bathrooms',
  'Living area (sq ft)',
  'Fee frequency',
  'Showing instructions',
  'Lot size area',
  'Year built',
  'Number of shares',
  'Listing office',
  'Max financing (%)',
  'Tax Lot',
  'Pets allowed',
  'Flip tax (%)',
  'Contract Date',
  'Owner pays',
  'Tax block',
  'Resale/sponsor',
  'Flip tax remarks',
  'Lease type',
  'Percent of common elements',
  'Owner pays remarks',
  'Structure type',
  'Monthlies/recurring fees',
  'Elevators total',
  'Minimum lease months',
  'Garage',
  'Max financing remarks',
  'Full bathrooms',
  'Special listing conditions',
  'Listing agreement',
  'Laundry features',
  'Building area total',
  'Lot size units',
  'Lot size dimensions',
  'Number of assigned spaces',
  'Number of units total',
  'rls',
  'Co-brokerage agreement',
  'Property condition',
  'Building area units',
  'Maximum lease months',
  'Flip tax amount ($)',
  'Concessions',
  'Buyer agency compensation type',
  'Total bedrooms',
  '

In [114]:
stats = pd.DataFrame(stats)
stats.head()

Unnamed: 0,missing_field,num_listings
0,,2025
1,Half bathrooms,2
2,Living area (sq ft),37
3,Fee frequency,203
4,Showing instructions,7


In [115]:
stats.to_csv('data/stats.csv', index=False)

In [116]:
listings_df[listings_df['keys'] != ''].to_csv('data/master_all_fields.csv')

In [117]:
'''for brokerage_id in listings_df['brokerage_id'].unique():
    # agents = l[l['brokerage_id'] == brokerage_id]['list_agent_full_name'].unique()
    #Path(f'data/{brokerage_id}').mkdir(parents=True, exist_ok=True)
    l[l['brokerage_id'] == brokerage_id].to_excel(f'data/{brokerage_id}_master_missing_fields.xlsx', index=False)

    for a in agents:
        l[(l['list_agent_full_name'] == a) | (l['co_list_agent_full_name'] == a)].to_csv(f'data/{brokerage_id}/{a}.csv', index=False)'''
    

"for brokerage_id in listings_df['brokerage_id'].unique():\n    # agents = l[l['brokerage_id'] == brokerage_id]['list_agent_full_name'].unique()\n    #Path(f'data/{brokerage_id}').mkdir(parents=True, exist_ok=True)\n    l[l['brokerage_id'] == brokerage_id].to_excel(f'data/{brokerage_id}_master_missing_fields.xlsx', index=False)\n\n    for a in agents:\n        l[(l['list_agent_full_name'] == a) | (l['co_list_agent_full_name'] == a)].to_csv(f'data/{brokerage_id}/{a}.csv', index=False)"

In [118]:
b_mapping = pd.read_csv('data/brokerage_mapping.csv')
b_mapping.columns = ['brokerage_id', 'brokerage_name']

In [119]:
l = l.merge(b_mapping, how='left', on='brokerage_id')


In [120]:
print(len(l))
l.drop_duplicates(subset='listing_id', inplace=True)
print(len(l))

2025
2024


In [121]:
stats = {'missing_field': [], 'num_listings': []}
for k in all_keys:
    count = 0
    for i in l['keys']:
        if k in i:
            count += 1
    stats['missing_field'].append(k)
    stats['num_listings'].append(count)

In [122]:
def report_writer(dfs:dict, report_name:str):
    """Creates an excel report using a dictionary of dfs.

    Args:
        dfs (dict): Data
        report_name (str): Name of file.
    """
    writer = pd.ExcelWriter(report_name, engine='xlsxwriter')
    workbook = writer.book
    format = workbook.add_format()
    format.set_align('center')
    for sheetname, df in dfs.items():
        df.to_excel(writer, sheet_name=sheetname, index=False)
        worksheet = writer.sheets[sheetname]
        for idx, col in enumerate(df):
            series =df[col]
            max_len = max((
                series.astype(str).map(len).max(),
                len(str(series.name))
            ))+1
            worksheet.set_column(idx, idx, max_len, format)

    writer.close()

In [123]:
for b in l['brokerage_name'].unique():
    mydict = {}
    mydict['all_listings'] = l[l['brokerage_name'] == b]
    stats = {'missing_field': [], 'num_listings': []}
    for k in all_keys:
        count = 0
        for i in l[l['brokerage_name'] == b]['keys']:
            if k in i:
                count += 1
        stats['missing_field'].append(k)
        stats['num_listings'].append(count)
    mydict['summary_stats'] = pd.DataFrame(stats)
    if '/' in str(b):
        b = str(b).replace('/', '')
        print(b)
    try:
        report_writer(mydict, f'data/out/{b}_master_all_fields.xlsx')
    except OSError:
        print(b)
        report_writer(mydict, 'data/out/remax_master_all_fields.xlsx')

In [124]:
report_writer(mydict, 'data/master_all_fields1.xlsx')