In [1]:
!pip install geopy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import folium
from geopy.geocoders import Nominatim
from collections import Counter

def get_coordinates(location_name):
    geolocator = Nominatim(user_agent="location_converter")
    location = geolocator.geocode(location_name)
    if location:
        return location.latitude, location.longitude
    else:
        return None

def plot_locations_on_map(locations):
    coordinates_dict = {}
    for location in locations:
        coordinates = get_coordinates(location)
        if coordinates:
            coordinates_dict[location] = coordinates

    # Count frequency of each location
    location_counter = Counter(locations)

    # Create a color scale based on frequency
    max_frequency = max(location_counter.values())
    color_scale = {location: 'red' if count == 1 else 'blue' if count <= max_frequency / 2 else 'green' for location, count in location_counter.items()}

    # Create map
    map = folium.Map(location=[0, 0], zoom_start=2)

    # Plot locations with different colors based on frequency
    for location, coordinates in coordinates_dict.items():
        color = color_scale[location]
        folium.Marker(location=coordinates, popup=location, icon=folium.Icon(color=color)).add_to(map)

    # Save map as HTML file
    map.save('locations_map.html')
    print("Map saved as 'locations_map.html'")


    # Sample array of locations
locations = ["New York", "New York", "London", "Paris", "Paris", "Paris", "Tokyo", "Tokyo", "Tokyo", "Tokyo", "Tokyo"]
    
plot_locations_on_map(locations)

Map saved as 'locations_map.html'


In [2]:
import spacy
from spacy.matcher import PhraseMatcher

def extract_locations(text):
    nlp = spacy.load("en_core_web_lg")
    matcher = PhraseMatcher(nlp.vocab)
    # Add custom location names to the matcher
    location_patterns = ['Gandhinagar']
    for loc in location_patterns:
        matcher.add("LOCATION", None, nlp(loc))

    doc = nlp(text)
    locations = []
    matches = matcher(doc)
    for match_id, start, end in matches:
        span = doc[start:end]
        locations.append(span.text)
    return locations

paragraph = """
Gandhinagar Tokyo UN reports tokyo Leogane 80-90 destroyed.
"""

locations = extract_locations(paragraph)
print("Locations mentioned in the paragraph:")
for location in locations:
    print(location)


Locations mentioned in the paragraph:
Gandhinagar


In [9]:
import spacy
from spacy.matcher import PhraseMatcher

def extract_locations(text):
    nlp = spacy.load("en_core_web_sm")
    matcher = PhraseMatcher(nlp.vocab)
    # Add custom location names to the matcher
    location_patterns = ['Gandhinagar']
    for loc in location_patterns:
        matcher.add("LOCATION", None, nlp(loc))

    doc = nlp(text)
    locations = []
    
    # Extract locations using NER
    for ent in doc.ents:
        if ent.label_ == 'GPE' or ent.label_ == 'LOC':
            locations.append(ent.text)

    # Extract additional locations using the custom matcher
    matches = matcher(doc)
    for match_id, start, end in matches:
        span = doc[start:end]
        locations.append(span.text)
    
    return locations

paragraph = """
UN statue of liberty reports iraq tokyo Leogane Gandhinagar 80-90 destroyed.
"""

locations = extract_locations(paragraph)
print("Locations mentioned in the paragraph:")
for location in locations:
    print(location)


Locations mentioned in the paragraph:
iraq
tokyo
Gandhinagar


In [12]:
import spacy
import pandas as pd
from geopy.geocoders import Nominatim


def extract_locations_from_csv(csv_file_path, message_column_name):
    df = pd.read_csv(csv_file_path)

    locations = []

    nlp = spacy.load("en_core_web_sm")

    def extract_locations(text):
        doc = nlp(text)
        locations = []
        for ent in doc.ents:
            if ent.label_ == 'GPE' or ent.label_ == 'LOC':  
                locations.append(ent.text)
        return locations

    for message in df[message_column_name]:
        message_locations = extract_locations(message)
        locations.extend(message_locations)

    return locations

csv_file_path = 'D:/IIITR Class Docs/6th Sem/Mini Project/Code/0f_iraq_iran.csv'
message_column_name = "text"  # Change this to the name of the column containing messages in your CSV file

locations = extract_locations_from_csv(csv_file_path, message_column_name)
print("Locations mentioned in the CSV file:")
for location in locations:
    print(location)


Locations mentioned in the CSV file:
Iran
Iraq
Iran
Iran
Iraq
Erbil
Iraq
Iraq
Iraq
Erbil
Iraq
Iraq
Iraq
Erbil
Iraq
Iraq
Iraq
Erbil
Iraq
Iraq
Iran
Iraq
Iran
Iran
Iraq
Iran
Iran
Iraq
Turkey
Iraq
Iraq
Iran
Iraq
Iran
Iraq
Iran
Iraq
Iran
Iraq
Iraq
Iraq
Iran
Iran
Iran
Iraq
Iran
Najaf
Iraq
Iran
Baghdad
Iran
Iran
Iran
Iraq
Iran
Iraq
Pakistan
Iran
Iraq
kuwait
Iran
Iraq
Halabja
Sulaymaniyah
Iraq
Halabja
https://t.co/6MaFWai7Vb
Israel
Iraq
Iran
IRGC
Iran
Western Iran
Iraq
Iraq
Iran
Iran
Iraq
Iraq
Iran
Bhuj
Haiti
Iraq
Iraq
Iraq
Kurdistan
Kurdistan
Iran
Kurdistan
Kurdistan
Iran
Kurdistan
Kurdistan
Iran
Kurdistan
Kurdistan
Iran
Iran
Iran
Iran
Iran
Iran
Iran
Iran
the Middle East
Iran
iraq
Iraq
Iran
Iran
Iraq
Iran
Iraq
Iraq
Iran
Tehran
Iran
Tehran
Iran
Tehran
Iran
Tehran
Iran
Turkey
Iraq
Iran
Russia
Iran
Iraq
Iran
Iran
kuwait
Iraq
Iran
Iran
Iraq
Iran
https://t.co/UdJYSo8MmL
Iraq
Iran
Iran
Iraq
Iran
Iran
Kermanshah Province
Iran
Kermanshah Province
Iran
Kermanshah Province
Iran
U.S.
Iran
Iraq
Algeria
I

In [22]:
import streamlit as st
import pandas as pd
import folium
from streamlit_folium import folium_static
from geopy.geocoders import Nominatim
from collections import Counter
import spacy
from spacy.matcher import PhraseMatcher
import itertools


def extract_locations(text):
    nlp = spacy.load("en_core_web_sm")
    matcher = PhraseMatcher(nlp.vocab)
    # Add custom location names to the matcher
    location_patterns = []
    for loc in location_patterns:
        matcher.add("LOCATION", None, nlp(loc))

    doc = nlp(text)
    
    # Extract locations using NER
    for ent in doc.ents:
        if ent.label_ == 'GPE' or ent.label_ == 'LOC':
            locations.append(ent.text)
            print("loc found!")
    # Extract additional locations using the custom matcher
    matches = matcher(doc)
    for match_id, start, end in matches:
        span = doc[start:end]
        locations.append(span.text)
        
    return locations

def get_coordinates(location_name):
    geolocator = Nominatim(user_agent="location_converter")
    location = geolocator.geocode(location_name)
    if location:
        return location.latitude, location.longitude
    else:
        return None

def get_color(frequency, max_frequency):
    if frequency == 1:
        return 'green'
    elif frequency == 2:
        return 'blue'
    else:
        return 'red'

df = pd.read_csv("D:/IIITR Class Docs/6th Sem/Mini Project/Code/preprocessed_0f_iraq_iran.csv")
df['processed_text'] = df['processed_text'].astype(str)
df['locations'] = df['processed_text'].apply(extract_locations)

locations = df['locations'].tolist()
locations = list(itertools.chain.from_iterable(locations))

print("All locations found:")
print(locations)

csv_file_path = r'D:/IIITR Class Docs/6th Sem/Mini Project/Code/preprocessed_0f_iraq_iran.csv'
message_column_name = "processed_text" 


loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!
loc found!

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
location_name = input("Enter the name of the location: ")
coordinates = get_coordinates(location_name)
if coordinates:
    print(f"Coordinates of {location_name}: Latitude {coordinates[0]}, Longitude {coordinates[1]}")
else:
    print(f"Couldn't find coordinates for {location_name}")
