In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from tqdm import tqdm

from urllib.request import Request, urlopen

# Dataset for Solar Farms - Harmonised global datasets

Paper Link : https://www.nature.com/articles/s41597-020-0469-8 <br>
Dataset link: https://figshare.com/articles/dataset/Harmonised_global_datasets_of_wind_and_solar_farm_locations_and_power/11310269/2

*"There are no restrictions on the use of these data. The data should be cited as follows: Dunnett, S. Harmonised global datasets of wind and solar farm locations and power. figshare. Dataset. https://doi.org/10.6084/m9.figshare.11310269.v1 (2020)"*

In [8]:
solar_farms_data_path = 'global_solar_2020.csv'
solar_farms_all = pd.read_csv(solar_farms_data_path)

solar_farms_US  = solar_farms_all[solar_farms_all['GID_0']=='USA'].reset_index(drop=True)
solar_farms_US.describe()

Unnamed: 0,X,Y,sol_id,panels,panel.area,landscape.area,water,urban,power
count,3949.0,3949.0,3949.0,3949.0,3949.0,3949.0,3949.0,3949.0,1127.0
mean,-8386997.0,4687226.0,14292.681438,153.322867,0.1283,0.244227,0.00076,0.431755,16.367508
std,1695486.0,495681.9,6779.123122,2331.445836,0.74528,1.294819,0.027555,0.495383,38.284127
min,-14565210.0,2554883.0,2.0,1.0,3e-06,0.0,0.0,0.0,0.841058
25%,-10009980.0,4408598.0,10949.0,3.0,0.000141,0.000248,0.0,0.0,2.288516
50%,-8680626.0,4637474.0,11954.0,6.0,0.001866,0.009032,0.0,0.0,4.343822
75%,-6748695.0,5078871.0,20089.0,16.0,0.032051,0.069371,0.0,1.0,13.59026
max,-5524927.0,7472106.0,34238.0,110246.0,22.289277,39.411116,1.0,1.0,551.823434


# Filter required data


In [9]:
min_panel_area = 1
solar_farms_US = solar_farms_US[solar_farms_US['panel.area']>min_panel_area].reset_index(drop=True)
display(solar_farms_US)

Unnamed: 0,X,Y,sol_id,GID_0,panels,panel.area,landscape.area,water,urban,power
0,-1.000846e+07,4.511227e+06,10916,USA,6,1.227272,1.424324,0,0,47.967101
1,-9.096103e+06,4.748488e+06,11003,USA,606,1.245456,1.901137,0,0,48.565182
2,-1.016870e+07,4.163161e+06,11017,USA,8,1.506165,1.598014,0,0,56.999325
3,-1.016205e+07,4.163922e+06,11018,USA,9,1.962525,2.611277,0,0,71.237799
4,-1.016837e+07,4.164224e+06,11019,USA,4,1.132378,1.194879,0,0,44.822683
...,...,...,...,...,...,...,...,...,...,...
110,-7.348833e+06,3.878649e+06,20859,USA,3,1.292202,1.292202,0,0,50.096449
111,-7.414232e+06,4.154596e+06,20967,USA,5,3.927843,3.927843,0,0,127.818296
112,-8.773385e+06,3.756635e+06,34147,USA,641,1.442593,1.442593,0,0,54.965527
113,-8.887567e+06,3.750114e+06,34151,USA,3,3.292225,3.292225,0,0,110.154428


## The (X,Y) above are in "Eckert IV equal-area projection (EPSG:54012)" format.
I couldn't find a direct function to convert these into the normal latitude, longitude format that we use. So I used an API from a website that converts it (https://epsg.io/transform#s_srs=3857&t_srs=4326&x=NaN&y=NaN). You can put this coordinates in google maps to get the image of the solar site.

A direct funtion might be possible somehow from https://pyproj4.github.io/pyproj/stable/ but I am not sure how to do it.

In [10]:
# NOTE : Order of (X,Y) is flipped at the end because google maps needs it that way

def get_google_map_coordinates(X,Y):
    
    url = 'http://epsg.io/trans?x='+str(X)+'&y='+str(Y)+'&z=0&s_srs='+str(54012)+'&t_srs='+str(4326)+'&callback=jsonpFunction'
    
    req = Request(
    url=url,#'http://www.cmegroup.com/trading/products/#sortField=oi&sortAsc=false&venues=3&page=1&cleared=1&group=1', 
    headers={'User-Agent': 'Mozilla/5.0'}
    )
    webpage = urlopen(req).read()
    webpage_str = webpage.decode()
    x = float(webpage_str.split('"x": "')[1].split('", "y":')[0])
    y = float(webpage_str.split('"y": "')[1].split('", "z":')[0])
    return y,x


def new_data_with_google_coordinates(old_data):
    new_data = old_data.copy().reset_index(drop=True)
    
    X_old = np.array(new_data['X'])
    Y_old = np.array(new_data['Y'])
    
    for i in tqdm(range(new_data.shape[0])):
        
        x_new,y_new = get_google_map_coordinates(X_old[i],Y_old[i])
        new_data.loc[i,'X'] = x_new
        new_data.loc[i,'Y'] = y_new
    
    return new_data

# Get google map coordinates for a single example

In [11]:
ind = 10

X_old = np.array(solar_farms_US['X'])[ind]
Y_old = np.array(solar_farms_US['Y'])[ind]

X_new,Y_new = get_google_map_coordinates(X_old, Y_old)
print(X_new,Y_new)

34.82641469953629 -118.40779822579395


# Update a whole dataset with google map coordinates

In [12]:
old_data = solar_farms_US
new_data = new_data_with_google_coordinates(old_data)

100%|█████████████████████████████████████████████████████████████| 115/115 [04:11<00:00,  2.19s/it]


In [13]:
new_data.to_csv("US_solar_sites_2020_min_panel_area_1_.csv")

In [33]:
for ind in range(new_data.shape[0]):
    print(new_data['X'][ind],new_data['Y'][ind])

35.62432928459123 -115.34799718448903
37.68505175417948 -105.88489981979211
32.6594166668723 -115.65519910784167
32.66583084634074 -115.58272498766743
32.66837580205608 -115.6558284498562
32.69066266776989 -115.59543070167149
34.8522257765087 -118.35737254605496
34.68898678850267 -118.31019482539438
34.71896014145873 -118.28618932351482
34.826156265125434 -118.4227959572703
34.82641469953629 -118.40779822579395
34.6601951301126 -118.29665830090046
44.47255793699714 -95.66239297450271
45.488378503118895 -92.89203344987659
35.3225721169273 -118.81488118433742
35.81538940418605 -119.45545503921748
35.874864865594816 -119.45701561226313
34.829343222248596 -79.95024329873699
26.86440782213034 -81.74005516581298
35.36704239451963 -120.02986385098873
35.34859169583896 -120.05816870805636
36.581593251724755 -120.41597108729059
36.581424678809164 -120.397985404619
36.568359852710614 -120.39951963407485
36.39373882931366 -120.23973128629207
35.37161604877933 -120.05941329732576
35.03228200245083