---
### Importing Required Python Packages
---

In [None]:
# Importing packages

import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim

---
### Connecting Google Colab with Google Drive
---

In [None]:
# Connecting colav with drive

from google.colab import drive
drive.mount('/content/drive')
print('Successfully Connected')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Successfully Connected


---
### Data Set Reading and Basic Exploration

* Read the data frame, save as a variable location_df
* Check first five records
* Get the Shape of the data frame
* Get the Column Names
* Get the Column Types
* Check for any miising Values
---

In [None]:
# Read the data frame, save as a variable location_df
# Check first five records

location_df = pd.read_csv('/content/drive/MyDrive/Project_Data_Sets/India_Region_Location.csv')
location_df.head()

Unnamed: 0,Regions,Frequency,StateNames
0,Anantapur,3,Andhra Pradesh
1,Chittoor,3,Andhra Pradesh
2,Cuddapah,6,Andhra Pradesh
3,East Godavari,5,Andhra Pradesh
4,Gangavaram,1,Andhra Pradesh


In [None]:
# Get the Shape of the data frame

location_df.shape

(620, 3)

In [None]:
# Get the Column Names

location_df.columns

Index(['Regions', 'Frequency', 'StateNames'], dtype='object')

In [None]:
# Get the Column Types

location_df.dtypes

Regions       object
Frequency      int64
StateNames    object
dtype: object

In [None]:
# Check for any miising Values

location_df.isnull().sum()

Regions       0
Frequency     0
StateNames    0
dtype: int64

---
### Data Manipulation and Extraction

* Filter out the regions that has 'Frequency' more than 5
* Store the filtered data frame as mainlocation_df
* Export the data frame as 'India_Main_Locations.csv'
---

In [None]:
# Filter out the regions that has 'Frequency' more than 5
# Store the filtered data frame as mainlocation_df

mainlocations = location_df[location_df['Frequency'] > 5]
mainlocations.shape

(66, 3)

In [None]:
# Storing the Frequency values in an object

freq = mainlocations['Frequency']

In [None]:
# Check The Row Entries for th 'Regions' column for the mainlocation data frame

mainlocations['Regions'].unique()

array(['Cuddapah', 'Guntur', 'Krishna', 'Nellore', 'Prakasam',
       'Vishakhapatnam', 'West Godavari', 'Barpeta', 'Bongaigaon',
       'Cachar', 'Darrang', 'Dhemaji', 'Dhubri', 'Dibrugarh', 'Goalpara',
       'Golaghat', 'Jorhat', 'Kamrup', 'Karimganj', 'Kokrajhar',
       'Marigaon', 'Nagaon', 'Nalbari', 'Sibsagar', 'Sonitpur',
       'Tinsukia', 'Araria', 'Bhagalpur', 'Darbhanga', 'Gopalganj',
       'Katihar', 'Kisanganj', 'Madhepura', 'Madhubani', 'Muzaffarpur',
       'Nalanda', 'Patna', 'Purnea', 'Saharsa', 'Saran', 'Sitamarhi',
       'Supaul', 'Vaishali', 'Surat', 'Baleshwar', 'Bhadrak',
       'Jagatsinghpur', 'Jajpur', 'Kendrapara', 'Puri', 'Chennai',
       'Cuddalore', 'Nagapattinam', 'Hyderabad', 'Karimnagar', 'Khammam',
       'Mahbubnagar', 'Nalgonda', 'BaraBanki', 'Faizabad', 'Gonda',
       'Lakhimpur', 'North 24 Parganas', 'Hooghly', 'Kolkata',
       'South 24 Parganas'], dtype=object)

In [None]:
# Check the mainlocations data frame shape

mainlocations.shape

(66, 3)

---

---

In [None]:
# Exporting the mainlocations data frame as csv file as 'India_Main_Location.csv'

file_path = '/content/drive/MyDrive/Project_Data_Sets/India_Main_Location.csv'
mainlocations.to_csv(file_path, index = False)

In [None]:
# Initialize the geolocator

geolocator = Nominatim(user_agent="indian_region_locator")

In [None]:
def get_lat_long(region_name):
    location = geolocator.geocode(region_name + ", India")
    if location:
        return (location.latitude, location.longitude)
    else:
        return (None, None)

In [49]:
# Create a list to store the results
data = []

# Get latitude and longitude for each region
for region in mainlocations['Regions']:
    lat, long = get_lat_long(region)
    data.append({"Region": region, "Latitude": lat, "Longitude": long})

# Create a DataFrame from the data
df = pd.DataFrame(data)

df.head()



Unnamed: 0,Region,Latitude,Longitude
0,Cuddapah,14.49566,78.834928
1,Guntur,16.291519,80.454159
2,Krishna,16.213991,81.025728
3,Nellore,14.449372,79.987376
4,Prakasam,15.658974,79.471644
