# Moroccan real estate market analysis

## 1. API Data fetching

In [1]:
GRAPHQL_API_DATA_QUERY = """
query getListingAds($query: ListingAdsSearchQuery!) {
  getListingAds(query: $query) {
    count {
      total
    }
    ads {
      details {
        ... on PublishedAd {
          adId
          listId
          category {
            id
            name
            parent {
              id
              name
            }
          }
          type {
            key
            name
          }
          title
          description
          price {
            withCurrency
            withoutCurrency
          }
          discount
          params {
            secondary {
              ... on TextAdParam {
                id
                name
                textValue
                trackingValue
              }
              ... on NumericAdParam {
                id
                name
                numericValue
                unit
              }
              ... on BooleanAdParam {
                id
                name
                booleanValue
              }
            }
          }
          sellerType
          location {
            city {
              id
              name
              __typename
            }
            area {
              id
              name
              __typename
            }
            address
            __typename
          }
          listTime
          offersShipping
          isEcommerce
          isImmoneuf
        }
      }
    }
  }
}
"""

GRAPHQL_API_COUNT_QUERY = """
query getListingAds($query: ListingAdsSearchQuery!) {
  getListingAds(query: $query) {
    count {
      total
    }
  }
}
"""

graphql_api_variables = lambda page_offset=1, page_results_count=1: {
    "query": {
        "filters": {
            "ad": {
                "categoryId": 1200,
                "type": "SELL",
                "hasPrice": True,
                "hasImage": True
            }
        },
        "page": {
            "number": page_offset,  # Results page offset (Starts from 1)
            "size": page_results_count  # Number of results per request (Maxed at 1000)
        }
    }
}

GRAPHQL_API_URL = "https://gateway.avito.ma/graphql"

GRAPHQL_API_HEADERS = {
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}


def fetch(url, headers, query, variables):
    response = requests.post(url, headers=headers, json={"query": query, "variables": variables})
    response.raise_for_status()
    return response.json()

In [2]:
import math
import requests

results_count = fetch(GRAPHQL_API_URL, GRAPHQL_API_HEADERS, GRAPHQL_API_COUNT_QUERY, graphql_api_variables())
results_count = int(results_count["data"]["getListingAds"]["count"]["total"])
pages_count = math.ceil(results_count / 1000)

raw_data = []
for page in range(1, pages_count + 1):
    n_results = results_count - (page * 1000) if page == pages_count else 1000

    current_page_response = fetch(
        GRAPHQL_API_URL,
        GRAPHQL_API_HEADERS,
        GRAPHQL_API_DATA_QUERY,
        graphql_api_variables(page, n_results)
    )

    raw_data.extend(current_page_response["data"]["getListingAds"]["ads"]["details"])


## 2. Convert JSON data to Pandas DataFrame

In [3]:
import pandas as pd

clean_data = []
for ad in raw_data:
    clean_row = {
        "adId": ad["adId"],
        "listId": ad["listId"],
        "listTime": ad["listTime"],
        "title": ad["title"],
        "description": ad["description"],
        "priceStr": ad["price"]["withCurrency"],
        "price": ad["price"]["withoutCurrency"],
        "categoryId": ad["category"]["id"],
        "categoryName": ad["category"]["name"],
        "parentCategoryId": ad["category"]["parent"]["id"],
        "parentCategoryName": ad["category"]["parent"]["name"],
        "typeKey": ad["type"]["key"],
        "typeName": ad["type"]["name"],
        "locationCityId": ad["location"]["city"]["id"],
        "locationCityName": ad["location"]["city"]["name"],
        "sellerType": ad["sellerType"],
        "discount": ad["discount"],
        "offersShipping": ad["offersShipping"],
        "isEcommerce": ad["isEcommerce"],
        "isImmoneuf": ad["isImmoneuf"],
    }

    if ad["location"].get("area"):

        if ad["location"]["area"].get("id"):
            clean_row["locationAreaId"] = ad["location"]["area"]["id"]

        if ad["location"]["area"].get("name"):
            clean_row["locationAreaName"] = ad["location"]["area"]["name"]

    if "secondary" in ad['params'] and ad['params']['secondary']:
        params = {param['id']: param.get('textValue') or param.get('numericValue') for param in
                  ad['params']['secondary']}
        clean_row["rooms"] = params.get("rooms")
        clean_row["bathrooms"] = params.get("bathrooms")
        clean_row["size"] = params.get("size")

    clean_data.append(clean_row)

df = pd.DataFrame(clean_data)

## 3. Include city coordinates

In [4]:
from os import environ
from dotenv import load_dotenv
from time import sleep
from opencage.geocoder import OpenCageGeocode

# Load local .env file
load_dotenv()

# OpenCageData API rate limits :
# Daily limit of 2,500 requests/day, with a rate limit of 1 request/sec
OPENCAGE_API_KEY = environ.get("OPENCAGE_API_KEY")

geocoder = OpenCageGeocode(OPENCAGE_API_KEY)

coors = []
city_names = list(df['locationCityName'].unique())

for city_name in city_names:
    results = geocoder.geocode(f"{city_name}, Morocco")
    latitude, longitude = results[0]["geometry"].values()
    coors.append({
        "locationCityName": city_name,
        "latitude": latitude,
        "longitude": longitude
    })
    sleep(1.1)  # Following the API's rate limit of 1 request/sec

In [None]:
# Convert the list of city coordinates to a Pandas DataFrame for easy merging with original DataFrame
coors = pd.DataFrame(coors)

# Merge the coordinates DataFrame with the original DataFrame adding 2 new columns (latitude, longitude)
df = df.merge(coors, on="locationCityName", how="left")

## 4. Save Pandas DataFrame to csv file

In [5]:
df.to_csv("data/raw-fetched-data.csv", index=False)