In [1]:
# Required for API calls
import json
import requests
import itertools
import googlemaps

# Required for data preprocessing
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas as gpd

# Required for training an AI model
from shapely.geometry import Point, LineString
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score,classification_report

In [None]:
#_______________________________________________
key = "GOOGLE MAPS API KEY"
OPENAI_API_KEY = 'OPEN AI API KEY'

# Paths to your shapefiles
road_shapefile_path = 'ABSOLUTE PATH TO THE ROAD NETWORK FILE'
polygon_shapefile_path = 'ABSOLUTE PATH TO SHAPE FILES'
#_______________________________________________

In [2]:
# Setting the random state seed value for a reproducible output
seed = 1008781695

In [3]:
dataset = pd.read_csv("/content/drive/MyDrive/sentiment_yelp/customer_reviews_binary_21000.csv")

In [4]:
x_train, x_test, y_train, y_test = train_test_split(dataset["Comment"], dataset["Sentiment"], train_size=0.7, random_state=seed)

In [5]:
# SInce AI model takes numeric inputs only, we have to vectorize every strings
vectorizer = CountVectorizer()
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

In [6]:
# Creating a classifier model and training it
rf_clf1 = RandomForestClassifier(n_estimators = 10,
                                max_depth = 6,
                                min_samples_leaf = 5,
                                min_samples_split = 8,
                                random_state = seed,
                                n_jobs = -1)
rf_clf1.fit(x_train_vec, y_train)
pred = rf_clf1.predict(x_test_vec)
print('Prediction Accuracy: {:.4f}'.format(accuracy_score(y_test,pred)))

Prediction Accuracy: 0.9637


In [7]:
# Checking its performance
y_pred = rf_clf1.predict(x_test_vec)
accuracy = accuracy_score(y_test,y_pred)
print("accuracy",accuracy)
Report = classification_report(y_test,y_pred)
print(Report)

accuracy 0.9636565624504047
              precision    recall  f1-score   support

    Negative       1.00      0.93      0.96      3175
    Positive       0.93      1.00      0.96      3126

    accuracy                           0.96      6301
   macro avg       0.97      0.96      0.96      6301
weighted avg       0.97      0.96      0.96      6301



In [16]:
# Performing the sentiment analysis with Google Maps only

# Initialize a set to store sentiment scores
score_set = set()

gmaps = googlemaps.Client(key=key)

def extract_intersections_within_polygon(road_shapefile, polygon_shapefile):
    try:
        print("Loading road shapefile...")
        roads_gdf = gpd.read_file(road_shapefile)
        print(f"Road shapefile loaded successfully. Number of geometries: {len(roads_gdf)}")

        print("Loading polygon shapefile...")
        polygon_gdf = gpd.read_file(polygon_shapefile)
        print(f"Polygon shapefile loaded successfully. Number of geometries: {len(polygon_gdf)}")

        # Reproject both shapefiles to EPSG:4326
        print("Reprojecting road shapefile to EPSG:4326...")
        roads_gdf = roads_gdf.to_crs(epsg=4326)
        print("Reprojecting polygon shapefile to EPSG:4326...")
        polygon_gdf = polygon_gdf.to_crs(epsg=4326)
        print("Reprojection completed.")

        # Ensure the polygon shapefile contains one polygon
        if len(polygon_gdf) != 1:
            print("The polygon shapefile should contain exactly one polygon.")
            return []

        polygon = polygon_gdf.geometry.iloc[0]

        # Ensure the road geometries are lines
        roads_gdf = roads_gdf[roads_gdf.geometry.type == 'LineString']
        print(f"Filtered LineString geometries. Number of LineString geometries: {len(roads_gdf)}")

        # Filter the road lines that intersect with the polygon
        roads_gdf = roads_gdf[roads_gdf.intersects(polygon)]
        print(f"Filtered roads that intersect with the polygon. Number of intersecting roads: {len(roads_gdf)}")

        # Create an empty list to store the intersections
        intersections = []

        # Compare each line with every other line to find intersections within the polygon
        total_combinations = len(roads_gdf) * (len(roads_gdf) - 1) // 2
        print(f"Total number of line combinations to check: {total_combinations}")

        count = 0
        for line1, line2 in itertools.combinations(roads_gdf.geometry, 2):
            count += 1
            if count % 1000 == 0:
                print(f"Checked {count} / {total_combinations} combinations")

            if line1.intersects(line2):
                intersection = line1.intersection(line2)
                if intersection.geom_type == 'Point' and polygon.contains(intersection):
                    intersections.append(intersection)
                elif intersection.geom_type == 'MultiPoint':
                    for point in intersection.geoms:
                        if polygon.contains(point):
                            intersections.append(point)

        # Extract the coordinates of the intersections
        coordinates = [(point.y, point.x) for point in intersections]  # Latitude and Longitude
        print(f"Number of intersections found: {len(coordinates)}")

        return coordinates

    except Exception as e:
        print(f"An error occurred: {e}")
        return []

def save_intersections_to_txt(coordinates, output_file):
    try:
        print("Saving intersections to text file...")
        with open(output_file, 'w') as f:
            for lat, lon in coordinates:
                f.write(f"{lat}, {lon}\n")
        print(f"Intersections saved to {output_file}")
    except Exception as e:
        print(f"An error occurred while saving to file: {e}")

def perform_nearby_search(coordinates, radius, polygon, place_ids_set, reviews_list):
    try:
        print("Performing Nearby Search...")

        for lat, lon in coordinates:
            params = {
                'location': f'{lat},{lon}',
                'radius': radius,  # Radius in meters
                'key': key
            }

            url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'
            response = requests.get(url, params=params)

            if response.status_code == 200:
                places_result = response.json().get('results', [])
                for place in places_result:
                    place_location = place['geometry']['location']
                    place_point = Point(place_location['lng'], place_location['lat'])
                    if polygon.contains(place_point):
                        place_ids_set.add(place['place_id'])

                        # Fetch place details including reviews
                        place_id = place['place_id']
                        details_params = {
                            'place_id': place_id,
                            'fields': 'rating,reviews',
                            'key': key
                        }
                        details_url = 'https://maps.googleapis.com/maps/api/place/details/json'
                        details_response = requests.get(details_url, params=details_params)

                        if details_response.status_code == 200:
                            place_details = details_response.json().get('result', {})
                            if 'reviews' in place_details:
                                for review in place_details['reviews']:
                                    reviews_list.append(review['text'])
            else:
                print(f"Error {response.status_code}: {response.text}")

        print(f"Number of unique Place IDs found within the polygon: {len(place_ids_set)}")

    except Exception as e:
        print(f"An error occurred during Nearby Search: {e}")

# Output file path
intersections_file_path = 'intersections.txt'
places_file_path = 'places.txt'

# Radius for Nearby Search (in meters)
radius = 400

# Extract intersections within the polygon
print("Starting intersection extraction...")
intersections = extract_intersections_within_polygon(road_shapefile_path, polygon_shapefile_path)

# Save intersections to a text file
if intersections:
    save_intersections_to_txt(intersections, intersections_file_path)

    # Load the polygon shapefile again to get the polygon geometry
    polygon_gdf = gpd.read_file(polygon_shapefile_path)
    polygon_gdf = polygon_gdf.to_crs(epsg=4326)
    polygon = polygon_gdf.geometry.iloc[0]

    # Perform Nearby Search and save unique Place IDs to a set
    unique_place_ids = set()
    reviews_list = []

    perform_nearby_search(intersections, radius, polygon, unique_place_ids, reviews_list)

    # Print all reviews
    print("All Reviews:")
    sum_score = 0
    for review in tqdm(reviews_list, desc="Processing reviews"):
        r_vec = vectorizer.transform([review])
        pred_sentiment = rf_clf1.predict(r_vec)
        if pred_sentiment[0] == 'Positive':
            sum_score += 1

    print("\nSentiment Score: " + str(sum_score / len(reviews_list)))

else:
    print("No intersections found within the polygon.")


Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40712 sha256=edb24651b8b0ac64b29bdb996d73bc6fbc58a62d6a113ae2436e16963343ca73
  Stored in directory: /root/.cache/pip/wheels/17/f8/79/999d5d37118fd35d7219ef57933eb9d09886c4c4503a800f84
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0
Collecting geopandas
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Downloading pyproj-3.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)
Colle