In [48]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [51]:
#http://srome.github.io/Parsing-HTML-Tables-in-Python-with-BeautifulSoup-and-pandas/
class HTMLTableParser:

    def parse_url(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'lxml')
        return [self.parse_html_table(table)
                   for table in soup.find_all('table') 
                       if table['summary'] == 'Monthly Averaged Radiation Incident On An Equator-Pointed Tilted Surface '] [0]
    
    def parse_html_table(self, table):
        n_columns = 0
        n_rows=0
        column_names = []

        # Find number of rows and columns
        # we also find the column titles if we can
        for row in table.find_all('tr'):

            # Determine the number of rows in the table
            td_tags = row.find_all('td')
            if len(td_tags) > 0:
                n_rows+=1
                if n_columns == 0:
                    # Set the number of columns for our table
                    n_columns = len(td_tags)

            # Handle column names if we find them
            th_tags = row.find_all('th') 
            if len(th_tags) > 0 and len(column_names) == 0:
                for th in th_tags:
                    column_names.append(th.get_text())

        # Safeguard on Column Titles
        if len(column_names) > 0 and len(column_names) != n_columns:
            raise Exception("Column titles do not match the number of columns")

        columns = column_names if len(column_names) > 0 else range(0,n_columns)
        df = pd.DataFrame(columns = columns,
                          index= range(0,n_rows))
        row_marker = 0
        for row in table.find_all('tr'):
            column_marker = 0
            columns = row.find_all('td')
            for column in columns:
                df.iat[row_marker,column_marker] = column.get_text()
                column_marker += 1
            if len(columns) > 0:
                row_marker += 1

        # Convert to float if possible
        for col in df:
            try:
                df[col] = df[col].astype(float)
            except ValueError:
                pass

        return df

In [53]:
url = "https://eosweb.larc.nasa.gov/cgi-bin/sse/grid.cgi?&num=190131&lat=40.92&submit=Submit&hgt=100&veg=17&sitelev=&email=skip@larc.nasa.gov&p=grid_id&p=ret_tlt0&step=2&lon=9.49"
hp = HTMLTableParser()
df = hp.parse_url(url) # Grabbing the table from the tuple
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,Lat 40.92 Lon 9.49,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,AnnualAverage
1,SSE HRZ,1.77,2.63,3.93,4.78,5.90,6.97,7.19,6.15,4.61,3.08,1.96,1.53,4.21
2,K,0.42,0.46,0.51,0.49,0.53,0.60,0.63,0.60,0.55,0.48,0.42,0.40,0.51
3,Diffuse,0.80,1.09,1.47,1.96,2.23,2.17,1.94,1.78,1.55,1.20,0.87,0.71,1.48
4,Direct,2.83,3.62,4.75,4.73,5.76,7.41,8.16,7.09,5.56,4.11,2.97,2.55,4.97
5,Tilt 0,1.76,2.56,3.89,4.76,5.86,6.99,7.13,6.12,4.54,3.05,1.94,1.52,4.19
6,Tilt 25,2.55,3.36,4.62,5.05,5.83,6.67,6.99,6.38,5.22,3.91,2.72,2.28,4.64
7,Tilt 40,2.85,3.61,4.75,4.90,5.44,6.08,6.41,6.09,5.27,4.16,3.01,2.59,4.60
8,Tilt 55,3.00,3.67,4.63,4.51,4.78,5.19,5.52,5.49,5.04,4.19,3.14,2.75,4.33
9,Tilt 90,2.71,3.06,3.47,2.93,2.74,2.73,2.90,3.27,3.56,3.41,2.77,2.53,3.01


In [74]:
df1 = pd.DataFrame(df.values[1:,1:], index = df.values[:,0][1:], columns = df.values[0][1:] )
df1

Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,AnnualAverage
SSE HRZ,1.77,2.63,3.93,4.78,5.9,6.97,7.19,6.15,4.61,3.08,1.96,1.53,4.21
K,0.42,0.46,0.51,0.49,0.53,0.6,0.63,0.6,0.55,0.48,0.42,0.4,0.51
Diffuse,0.8,1.09,1.47,1.96,2.23,2.17,1.94,1.78,1.55,1.2,0.87,0.71,1.48
Direct,2.83,3.62,4.75,4.73,5.76,7.41,8.16,7.09,5.56,4.11,2.97,2.55,4.97
Tilt 0,1.76,2.56,3.89,4.76,5.86,6.99,7.13,6.12,4.54,3.05,1.94,1.52,4.19
Tilt 25,2.55,3.36,4.62,5.05,5.83,6.67,6.99,6.38,5.22,3.91,2.72,2.28,4.64
Tilt 40,2.85,3.61,4.75,4.9,5.44,6.08,6.41,6.09,5.27,4.16,3.01,2.59,4.6
Tilt 55,3.0,3.67,4.63,4.51,4.78,5.19,5.52,5.49,5.04,4.19,3.14,2.75,4.33
Tilt 90,2.71,3.06,3.47,2.93,2.74,2.73,2.9,3.27,3.56,3.41,2.77,2.53,3.01
OPT,3.02,3.67,4.75,5.05,5.95,6.99,7.2,6.4,5.28,4.21,3.14,2.77,4.88


In [None]:
#need code to build the url
#https://eosweb.larc.nasa.gov/cgi-bin/sse/grid.cgi?email=skip%40larc.nasa.gov&step=1&lat=40.92&lon=9.49&submit=Submit
#https://eosweb.larc.nasa.gov/cgi-bin/sse/grid.cgi?&num=190131&lat=40.92&submit=Submit&hgt=100&sitelev=&p=grid_id&p=ret_tlt0&step=2&lon=9.46