In [17]:
from google.cloud import storage
import pandas as pd

In [14]:
client = storage.Client.from_service_account_json(json_credentials_path='/Users/alexandergirardet/projects/estatewise/real_estate_analytics/development/real-estate-dev-key.json')
bucket_name = "rightmove_storage_dev"

blobs = client.list_blobs(bucket_name, prefix=f"rightmove/raw_data")

all_files = []

for blob in blobs:
    all_files.append(blob.name)

### Batch data for processing

In [22]:
from io import BytesIO

In [15]:
n = 5
batches = [
    all_files[i : i + n] for i in range(0, len(all_files), n)
]

In [20]:
files = batches[0]
bucket = client.bucket(bucket_name)

In [28]:
full_df = pd.DataFrame()
for file in files:
    blob = bucket.blob(file)
    df = pd.read_json(BytesIO(blob.download_as_string()), lines=True)
    # full_df = full_df.append(df)
    full_df = pd.concat([df, full_df], axis=0)

In [30]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 250 entries, 0 to 49
Data columns (total 51 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   id                           250 non-null    int64  
 1   bedrooms                     250 non-null    int64  
 2   bathrooms                    234 non-null    float64
 3   numberOfImages               250 non-null    int64  
 4   numberOfFloorplans           250 non-null    int64  
 5   numberOfVirtualTours         250 non-null    int64  
 6   summary                      250 non-null    object 
 7   displayAddress               250 non-null    object 
 8   countryCode                  250 non-null    object 
 9   location                     250 non-null    object 
 10  propertyImages               250 non-null    object 
 11  propertySubType              250 non-null    object 
 12  listingUpdate                250 non-null    object 
 13  premiumListing       

In [39]:
import jsonschema

In [None]:
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "email": {"type": "string", "format": "email"}
    },
    "required": ["name", "age"],
    "definitions": {
        "integer": {
            "type": "integer",
            "validator": validate_integer
        }
    }
}


In [41]:
json_dict = full_df.iloc[0].to_json()

In [43]:
import json

In [44]:
data = json.loads(json_dict)

# Create a Draft7Validator instance
validator = jsonschema.Draft7Validator(data)

# Extract the JSON schema
schema = validator.schema

# Print the extracted schema
print(json.dumps(schema, indent=2))

{
  "id": 131465186,
  "bedrooms": 1,
  "bathrooms": 1.0,
  "numberOfImages": 13,
  "numberOfFloorplans": 0,
  "numberOfVirtualTours": 0,
  "summary": "Peter David are pleased to offer this one bedroom property TO LET in the beautiful Luddenden Valley Luddenden is a conservation area surrounded by the Pennine Hills ideal for walkers Luddenden is served by a local bus route and has a strong community spirit with the Church and the friendly pub acting as focal points for many festivities throughout the year This property would ideally be suited to a single person or a couple Offered unfurnished the property comprises of a lounge, small kitchen, bathroom and one double bedroom The property also has gas central heating and double glazing though out Parking for one smallmedium sized car outside the property Other on street parking available on New Road We recommend an early viewing Pets will be considered Smoking outside pleaseBrochuresThorn View, LuddendenBrochure",
  "displayAddress": "Th

In [45]:
schema

{'id': 131465186,
 'bedrooms': 1,
 'bathrooms': 1.0,
 'numberOfImages': 13,
 'numberOfFloorplans': 0,
 'numberOfVirtualTours': 0,
 'summary': 'Peter David are pleased to offer this one bedroom property TO LET in the beautiful Luddenden Valley Luddenden is a conservation area surrounded by the Pennine Hills ideal for walkers Luddenden is served by a local bus route and has a strong community spirit with the Church and the friendly pub acting as focal points for many festivities throughout the year This property would ideally be suited to a single person or a couple Offered unfurnished the property comprises of a lounge, small kitchen, bathroom and one double bedroom The property also has gas central heating and double glazing though out Parking for one smallmedium sized car outside the property Other on street parking available on New Road We recommend an early viewing Pets will be considered Smoking outside pleaseBrochuresThorn View, LuddendenBrochure',
 'displayAddress': 'Thorn View, 