# Moroccan real estate market analysis

## 1. API Data fetching

In [1]:
import sys

GRAPHQL_API_DATA_QUERY = """
query getListingAds($query: ListingAdsSearchQuery!) {
  getListingAds(query: $query) {
    ads {
      details {
        ... on PublishedAd {
          adId
          listId
          category {
            id
            name
            parent {
              id
              name
            }
          }
          type {
            key
            name
          }
          title
          description
          price {
            withCurrency
            withoutCurrency
          }
          discount
          params {
		    primary {
			  ... on TextAdParam {
                id
                name
                textValue
                trackingValue
			  }
			  ... on NumericAdParam {
                id
                name
                numericValue
                unit
			  }
              ... on BooleanAdParam {
                id
                name
                booleanValue
              }
			},
            secondary {
              ... on TextAdParam {
                id
                name
                textValue
                trackingValue
              }
              ... on NumericAdParam {
                id
                name
                numericValue
                unit
              }
              ... on BooleanAdParam {
                id
                name
                booleanValue
              }
            }
          }
          sellerType
          location {
            city {
              id
              name
            }
            area {
              id
              name
            }
          }
          listTime
          isEcommerce
          isImmoneuf
        }
      }
    }
  }
}
"""

GRAPHQL_API_COUNT_QUERY = """
query getListingAds($query: ListingAdsSearchQuery!) {
  getListingAds(query: $query) {
    count {
      total
    }
  }
}
"""

graphql_api_variables = lambda page_offset=1, page_results_count=1: {
    "query": {
        "filters": {
            "ad": {
                "categoryId": 1200,
                "type": "SELL",
                "hasPrice": True,
                "hasImage": True,
                "price": {
                    "greaterThanOrEqual": 0
                }
            }
        },
        "page": {
            "number": page_offset,  # Results page offset (Starts from 1)
            "size": page_results_count  # Number of results per request (Maxed at 1000)
        }
    }
}

GRAPHQL_API_URL = "https://gateway.avito.ma/graphql"

GRAPHQL_API_HEADERS = {
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}


def fetch(url, headers, query, variables):
    response = requests.post(url, headers=headers, json={"query": query, "variables": variables})
    response.raise_for_status()
    return response.json()

In [2]:
import math
import requests

results_count = fetch(GRAPHQL_API_URL, GRAPHQL_API_HEADERS, GRAPHQL_API_COUNT_QUERY, graphql_api_variables())
results_count = int(results_count["data"]["getListingAds"]["count"]["total"])
pages_count = math.ceil(results_count / 1000)

raw_data = []
for page in range(1, pages_count + 1):
    n_results = results_count - (page * 1000) if page == pages_count else 1000

    current_page_response = fetch(
        GRAPHQL_API_URL,
        GRAPHQL_API_HEADERS,
        GRAPHQL_API_DATA_QUERY,
        graphql_api_variables(page, n_results)
    )

    raw_data.extend(current_page_response["data"]["getListingAds"]["ads"]["details"])

## 2. Convert JSON data to Pandas DataFrame

In [14]:
import pandas as pd

clean_data = []
for ad in raw_data:
    clean_row = {
        "adId": ad["adId"],
        "listId": ad["listId"],
        "listTime": ad["listTime"],
        "title": ad["title"],
        "description": ad["description"],
        "priceStr": ad["price"]["withCurrency"],
        "price": ad["price"]["withoutCurrency"],
        "categoryId": ad["category"]["id"],
        "categoryName": ad["category"]["name"],
        "parentCategoryId": ad["category"]["parent"]["id"],
        "parentCategoryName": ad["category"]["parent"]["name"],
        "typeKey": ad["type"]["key"],
        "typeName": ad["type"]["name"],
        "locationCityId": ad["location"]["city"]["id"],
        "locationCityName": ad["location"]["city"]["name"],
        "sellerType": ad["sellerType"],
        "discount": ad["discount"],
        "isEcommerce": ad["isEcommerce"],
        "isImmoneuf": ad["isImmoneuf"],
    }

    # Include area if it"s available
    if ad["location"].get("area"):
        if ad["location"]["area"].get("id"):
            clean_row["locationAreaId"] = ad["location"]["area"]["id"]

        if ad["location"]["area"].get("name"):
            clean_row["locationAreaName"] = ad["location"]["area"]["name"]

    # Include the primary parameters if they are available
    if "primary" in ad["params"] and ad["params"].get("primary") is not None:
        for param in ad["params"]["primary"]:
            clean_row[param["id"]] = param.get("textValue") or param.get("numericValue")

    # Include the secondary parameters if they are available
    if "secondary" in ad["params"] and ad["params"].get("secondary") is not None:
        for param in ad["params"]["secondary"]:
            clean_row[param["id"]] = param.get("textValue") or param.get("numericValue")

    clean_data.append(clean_row)

df = pd.DataFrame(clean_data)

## 3. Include city coordinates

In [5]:
from os import environ
from dotenv import load_dotenv
from time import sleep
from opencage.geocoder import OpenCageGeocode

# Load local .env file
load_dotenv()

# OpenCageData API rate limits :
# Daily limit of 2,500 requests/day, with a rate limit of 1 request/sec
OPENCAGE_API_KEY = environ.get("OPENCAGE_API_KEY")

geocoder = OpenCageGeocode(OPENCAGE_API_KEY)

coors = []
addresses = df[["locationAreaName", "locationCityName"]].drop_duplicates()

for i, (locationAreaName, locationCityName) in enumerate(addresses.values):
    query = f"{locationCityName}, Morocco"
    if locationAreaName:
        query = f"{locationAreaName}, " + query

    results = geocoder.geocode(query)

    city_coors_row = {
        "locationCityName": locationCityName,
        "locationAreaName": locationAreaName,
        "latitude": results[0]["geometry"]["lat"],
        "longitude": results[0]["geometry"]["lng"]
    }

    # Region and State District aren't guaranteed to always be returned!
    if results[0]["components"].get("region"):
        city_coors_row["locationRegionName"] = results[0]["components"]["region"]

    if results[0]["components"].get("state_district"):
        city_coors_row["locationDistrictName"] = results[0]["components"]["state_district"]

    coors.append(city_coors_row)

    # Progress logging 
    sys.stdout.write(f"\rCompleted fetching data of {i + 1} out of {len(addresses)} entries")
    sys.stdout.flush()

    sleep(1.1)  # Following the API's rate limit of 1 request/sec

Completed fetching data of 1185 out of 1185 cities

In [15]:
# Convert the list of city coordinates to a Pandas DataFrame for easy merging with original DataFrame
coors = pd.DataFrame(coors)

# Merge the coordinates DataFrame with the original DataFrame adding 2 new columns (latitude, longitude)
df = df.merge(coors, on=["locationAreaName", "locationCityName"], how="left")

## 4. Save data to csv files

In [16]:
df

Unnamed: 0,adId,listId,listTime,title,description,priceStr,price,categoryId,categoryName,parentCategoryId,...,property_age,floor,rooms,bathrooms,size,floors,latitude,longitude,locationRegionName,locationDistrictName
0,74284719,55281576,2024-07-23T01:19:01Z,Appartement à vendre 83m abdelmoumen,Appartement à vendre 83m abdelmoumen proche d...,1100000 DH,1100000,1010,Appartements,1200,...,1-5 ans,5,2,2,,,33.570553,-7.625982,Casablanca-Settat,Prefecture of Casablanca
1,74284316,55281448,2024-07-23T00:16:22Z,appartement vendre,"Appt (142 m2 et 19 m2 garage) orienté sud, ens...",200000 DH,200000,1010,Appartements,1200,...,11-20 ans,4,2,2,161.0,,33.578000,-7.615000,,Prefecture of Casablanca
2,74284223,55281249,2024-07-22T22:45:04Z,Appartement à vendre 90 m² à Drargua,Appartement à vendre 2 facades titré lotisseme...,590000 DH,590000,1010,Appartements,1200,...,6-10 ans,1,2,2,102.0,,30.452900,-9.480820,,
3,74284144,55281235,2024-07-22T22:42:43Z,Appartement à vendre 59 m² à Casablanca,bartma f bouskoura 1 er etage f residence fer...,350000 DH,350000,1010,Appartements,1200,...,,1,2,1,59.0,,33.528612,-7.646462,,Prefecture of Casablanca
4,74284036,55281124,2024-07-22T22:00:42Z,appartement,سكن جميل في حي راقي قرب متجر بيم سعيد حجي,750000 DH,750000,1010,Appartements,1200,...,Neuf,4,2,2,81.0,,34.074674,-6.793431,,Prefecture of Salé
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45030,74180642,55201091,2024-07-07T12:36:14Z,Appartement à vendre 73 m² à Casablanca,شقة للبيع بمساحة 60 متر مربع مع مرأب للسيارة\n...,600000 DH,600000,1010,Appartements,1200,...,1-5 ans,3,2,1,73.0,,33.584383,-7.507205,,Prefecture of Casablanca
45031,74180561,55201086,2024-07-07T12:35:28Z,شقق للبيع بمسابح ومطلة على البحر بمدينة أصيلة,مدينة أصيلة، الواقعة على الساحل الشمالي الغربي...,9500 DH,9500,1010,Appartements,1200,...,1-5 ans,1,2,2,90.0,,35.465220,-6.034150,,
45032,74180569,55201060,2024-07-07T12:33:14Z,Appartement à vendre 74 m² à Bouznika,شقة للبيع بمدينة بوزنيقة بواجهتين\r\nعمارة مزو...,620000 DH,620000,1010,Appartements,1200,...,1-5 ans,4,2,1,74.0,,33.789420,-7.159680,,
45033,74180570,55201019,2024-07-07T12:27:24Z,appartement a vendre Ain sebaa,appartement a vendre Ain sebaa \n97m2 .\ngarag...,980000 DH,980000,1010,Appartements,1200,...,1-5 ans,3,2,2,97.0,,33.605994,-7.538794,,Prefecture of Casablanca


In [17]:
df.to_csv("data/raw-fetched-data.csv", index=False)
coors.to_csv("data/city-data.csv", index=False)