In [None]:
"""
todo:
- home button??
- retrieve stuff
"""

In [None]:




import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import pickle

#read in dinosauria from pbdb
fname= '/content/drive/MyDrive/paleo/df_all.pkl'
fromfile=True
df_all = {}
clades = ["dinosauria","trilobita","mammalia", "pterosauria", "ichthyosauria", "plesiosauria",\
          "mosasaurus","synapsida", "crocodylomorpha", "ammonites"]

if fromfile:
  # Read the pickle file into a DataFrame
  with open(fname, 'rb') as file:
    df_all = pickle.load(file)
else:
  # Load from pbdb info and coordinates
  for clade in clades:
    # Define the URL for the CSV file -- note older that 64 million
    url = 'https://paleobiodb.org/data1.2/occs/list.csv?base_name='+\
      clade + '&min_ma=1&show=class,coords'
    # Read the  data directly into a pandas DataFrame
    #print(clade)
    df_all[clade] = pd.read_csv(url)
    #save to file for later
  with open(fname, 'wb') as file:
    pickle.dump(df_all, file)


for clade in clades:
  print(clade,': ',df_all[clade].shape)
for clade in clades:
  print(df_all[clade].head(3))


# Round the latitude and longitude columns to 2 decimal places
df_rounded={}
for clade in clades:
  df_rounded[clade] = df_all[clade].round({'lat': 2, 'lng': 2})

print(df_rounded[clades[0]].columns)



# just keep relevant columns
df_thin= {}
for clade in clades:
  df_thin[clade] = df_rounded[clade][['lat', 'lng', 'accepted_name','max_ma', 'collection_no']]

print(df_thin[clades[0]].columns)

#aggregate by location, so no duplicates
df_aggregated = {}
for clade in clades:
  df= df_thin[clade]
  df_aggregated[clade] = df.groupby(['lat', 'lng']).agg({
  'accepted_name': lambda x: list(set(x)),
  'max_ma': lambda x: list(set(x)),
  'collection_no': lambda x: list(set(x))
  }).reset_index()

for clade in clades:
  print(clade, df_aggregated[clade].shape)
print(df_aggregated[clades[0]].head(3))
print(df_aggregated[clades[0]].columns)


for clade in clades[:1]:
  print(clade,len(max(df_aggregated[clade]['accepted_name'], key=len)))
  print(clade,len(max(df_aggregated[clade]['max_ma'], key=len)))
  print(clade,len(max(df_aggregated[clade]['collection_no'], key=len)))


# split rows with too many names
def split_rows(df):
    new_rows = []
    for _, row in df.iterrows():
        names = row['accepted_name']
        if len(names) > 5:
            for i in range(5, len(names), 5):
                new_row = row.copy()
                new_row['accepted_name'] = names[i:i + 5]
                new_rows.append(new_row)
        else:
            new_rows.append(row)
    return pd.DataFrame(new_rows)

fname= '/content/drive/MyDrive/paleo/df_split.pkl'
fromfile=True

if fromfile:
  # Read the pickle file into a DataFrame
  with open(fname, 'rb') as file:
    df_split = pickle.load(file)
else:
  # Convert the aggregated DataFrame to a split DataFrame
  df_split = {}
  for clade in df_aggregated:
    df_split[clade] = split_rows(df_aggregated[clade])
    #save to file for later
  with open(fname, 'wb') as file:
    pickle.dump(df_split, file)



# Make name, family, order, class
# *** we will use order since it seems to exist and in Wikipedia usually
df_names = {}
for clade in clades:
    df_names[clade] = df_all[clade].groupby('accepted_name').agg({
        'order': 'first'  # Take the first occurrence
    }).reset_index()

print(df_names[clades[1]].shape)
print(df_names[clades[1]].head(3))



In [None]:
# Add the Google Tag Manager and AdSense scripts to the map
gtag_script = """
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-2RB0G9NLE8"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'G-2RB0G9NLE8');
</script>

<script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-4771147286272001"
     crossorigin="anonymous"></script>
"""

In [None]:
#**** use wiki to grab stuff in advance so it doesn't need so much live javascript!

# Works great, except doesn't say not found!

import folium
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from folium.plugins import LocateControl, MarkerCluster
from branca.element import Element

def mk_map(df_split, tclades, mindate, maxdate, df_names):
    # Convert the aggregated DataFrame to a GeoDataFrame
    geometry, geo_df, sgdf = {}, {}, {}
    for clade in tclades:
        geometry[clade] = [Point(xy) for xy in zip(df_split[clade]['lng'], df_split[clade]['lat'])]
        geo_df[clade] = gpd.GeoDataFrame(df_split[clade], geometry=geometry[clade])
        sgdf[clade] = geo_df[clade]

    # Create a Folium map centered on California with scroll wheel zoom enabled
    m = folium.Map(location=[36.7783, -119.4179], zoom_start=6, control_scale=True, scrollWheelZoom=True)

    # Add LocateControl to the map
    LocateControl().add_to(m)

    # Define a set of colors for the clades
    colors = ['blue', 'green', 'red', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'lightblue']

    # JavaScript function to fetch Wikipedia data including the image
    js_function = """
        function fetchWikipediaData(name, order, elementId) {
        let simpleWikiUrl = 'https://simple.wikipedia.org/api/rest_v1/page/summary/' + name;
        let enWikiUrl = 'https://en.wikipedia.org/api/rest_v1/page/summary/' + name;

        fetch(simpleWikiUrl)
            .then(response => {
                if (response.ok) {
                    return response.json();
                } else {
                    return fetch(enWikiUrl);
                }
            })
            .then(response => {
                if (response.ok) {
                    return response.json();
                } else {
                    // If the name fetch failed, try fetching the order
                    let orderUrl = 'https://en.wikipedia.org/api/rest_v1/page/summary/' + order;
                    return fetch(orderUrl).then(response => {
                        if (response.ok) {
                            return response.json();
                        } else {
                            throw new Error('No information found');
                        }
                    });
                }
            })
            .then(data => {
                let content = '';
                if (data.thumbnail && data.thumbnail.source) {
                    content += '<img src="' + data.thumbnail.source + '" width="150"><br>';
                }
                if (data.title.toLowerCase() === name.toLowerCase()) {
                    content += `<b>${name}</b><br>`;
                    content += data.extract + '<br>';
                    content += '<a href="https://en.wikipedia.org/wiki/' + name + '" target="_blank">Read more on Wikipedia</a>';
                } else {
                    content += `'${name}' not found in wikipedia. Showing the order.<br>`;
                    content += `<b>${data.title}</b> (Order Information)<br>`;
                    content += data.extract + '<br>';
                    content += '<a href="https://en.wikipedia.org/wiki/' + order + '" target="_blank">Read more about the order on Wikipedia</a>';
                }
                document.getElementById(elementId).innerHTML = content;
            })
            .catch(error => {
                console.error('Error fetching data from Wikipedia API:', error);
                document.getElementById(elementId).innerHTML = `Error: No information found for '${name}' or its order '${order}'`;
            });

        // Highlight the clicked name
        var nameElement = document.getElementById('name-' + name.replace(/ /g, '_'));
        if (nameElement) {
            nameElement.style.fontWeight = 'bold';
        }
    }
    """

    # Add JavaScript function to the map
    m.get_root().html.add_child(folium.Element(f'<script>{js_function}</script>'))

    # Add points to the MarkerCluster without layers
    marker_cluster = MarkerCluster().add_to(m)
    for i in range(len(tclades)):
        clade = tclades[i]
        color = colors[i % len(colors)]  # Add color to the marker
        for idx, row in sgdf[clade].iterrows():
            if row['max_ma'][0] >= mindate and row['max_ma'][0] <= maxdate:
                # Prepare the HTML content for the popup
                max_ma_list = ', '.join(map(str, row['max_ma']))
                collection_no_links = ''.join([f'<a href="https://paleobiodb.org/classic/basicCollectionSearch?collection_no={no}" target="_blank">collection</a><br>' for no in row['collection_no']])

                names_links = ''.join([
                    f'<a id="name-{str(name).replace(" ", "_")}" href="#" onclick="fetchWikipediaData(\'{str(name).replace(" ", "_")}\', \'{str(df_names[clade].loc[df_names[clade]["accepted_name"] == name, "order"].values[0]).replace(" ", "_")}\', \'wiki-content-{idx}\'); return false;" style="font-size: 16px;">{name}</a><br>'
                    if pd.notna(name) else
                    f'<b>Name not found, but it is a member of the order \'{str(df_names[clade].loc[df_names[clade]["accepted_name"] == name, "order"].values[0])}\'</b><br>'
                    for name in row['accepted_name']
                ])
                element_id = f'wiki-content-{idx}'
                html = f'''
                    <div style="text-align: center;">
                        <div style="font-size: larger;">
                            <b>About</b> {max_ma_list} <b>Million Years Old</b>
                        </div>
                        <br>
                        <div style="font-size: smaller;">
                            {collection_no_links}
                        </div>
                        <div id="name-container-{idx}">
                            {names_links}
                        </div>
                        <div id="{element_id}" style="height: 200px; overflow-y: auto; margin-top: 10px;"></div>
                    </div>
                '''
                popup = folium.Popup(html, max_width='85%', min_width='60%')
                folium.Marker(
                    location=[row['lat'], row['lng']],
                    popup=popup,
                    icon=folium.Icon(color=color),  # Add color to the marker
                ).add_to(marker_cluster)

    # Add the Google Analytics tracking code to the map
    element = Element(analytics)
    m.get_root().html.add_child(element)

    # Save the map to an HTML file
    cladenames = '_'.join(tclades + [str(mindate), str(maxdate)])
    fnamemap = '/content/drive/MyDrive/paleomapstuff/fossil_map' + cladenames + '.html'
    m.save(fnamemap)

    return m

In [None]:


# make maps - one at a time

#clades = ["dinosauria","trilobita","mammalia", "pterosauria", "ichthyosauria", "plesiosauria",\
#         "mosasaurus","synapsida", "crocodylomorpha", "ammonites"]

# control dates allowed
if True:  # for testing "trilobita" small
    mindate, maxdate = 440, 445  # for everything except mammals
    tclades = clades[1:2]
elif False:  # dinosauria "pterosauria", "ichthyosauria", "plesiosauria","mosasaurus", "crocodylomorpha"
    mindate, maxdate = 66, 1000  # for everything except mammals
    tclades = clades[0:1]  + clades[3:7]+ clades[8:9]
elif False:  # trilobita
    mindate, maxdate = 66, 1000  # for everything except mammals
    tclades = clades[1:2]
elif False:  # 'old mammalia'
    mindate, maxdate = 30, 1000  # for mammals
    tclades = clades[2:3]
else:  # 'young mammalia'
    mindate, maxdate = 1, 300  # for mammals
    tclades = clades[2:3]

m=mk_map(df_split, tclades, mindate, maxdate, df_names)



# Display the map
m


In [None]:




# make all

#clades = ["dinosauria","trilobita","mammalia", "pterosauria", "ichthyosauria", "plesiosauria",\
#         "mosasaurus","synapsida", "crocodylomorpha", "ammonites"]

# dinosauria "pterosauria", "ichthyosauria", "plesiosauria","mosasaurus", "crocodylomorpha"
mindate, maxdate = 66, 1000  # for everything except mammals
tclades = clades[0:1]  + clades[3:7]+ clades[8:9]
m=mk_map(df_split, tclades, mindate, maxdate, df_names)

# trilobita
mindate, maxdate = 66, 1000  # for everything except mammals
tclades = clades[1:2]
m=mk_map(df_split, tclades, mindate, maxdate, df_names)

# 'old mammalia'
mindate, maxdate = 30, 1000  # for mammals
tclades = clades[2:3]
m=mk_map(df_split, tclades, mindate, maxdate, df_names)

# 'young mammalia'
mindate, maxdate = 1, 30  # for mammals
tclades = clades[2:3]
m=mk_map(df_split, tclades, mindate, maxdate, df_names)





# Display the map
m