# UV RADIATION EXPOSURE AND MELANOMA INCIDENCE 

Combining environmental UV exposure data with cancer incidence statistics to examine melanoma risk across U.S. counties.

In [85]:
import openpyxl
import pandas as pd
import matplotlib as plt
import requests

### Importing datasets

- `uv_county` includes historical Average Daily Global Solar Radiation estimates (AVGLO) - a proxy measure for UV- in Wh/m² by county in the Continental US for the period 1961-1990 and the more recent 5-year average measures (2020 - 2024);

- `melanoma_county`  to come...

### Investigate sheet names as shown <a href="https://stackoverflow.com/questions/17977540/pandas-looking-up-the-list-of-sheets-in-an-excel-file" target="_blank">here</a>

In [86]:
uv_exposure = pd.ExcelFile('../datasets/uv-county-exposure.xlsx')
uv_exposure.sheet_names


['UV_County_1961-1990', 'UV_County_2000-2024']

In [87]:
recent_uv = pd.read_excel('../datasets/uv-county-exposure.xlsx', sheet_name='UV_County_2000-2024', dtype={'COUNTY_FIPS': str})
recent_uv.rename({'COUNTY NAME': 'COUNTY_NAME', 'STATENAME': 'STATE_NAME'}, axis=1, inplace=True)
recent_uv['COUNTY_FIPS'] = recent_uv['COUNTY_FIPS'].str.zfill(5)
recent_uv = recent_uv.sort_values(['STATE_NAME', 'COUNTY_NAME'])
recent_uv


Unnamed: 0,STATE_NAME,COUNTY_NAME,COUNTY_FIPS,UV_ Wh/m² (2000-2004),UV_ Wh/m² (2005-2009),UV_ Wh/m² (2010-2014),UV_ Wh/m² (2015-2019),UV_ Wh/m² (2020_2024)
2734,Alabama,Autauga,01001,4781.877818,4774.090182,4843.185939,4701.004606,4785.906061
2159,Alabama,Baldwin,01003,4916.409224,4904.892424,4934.852894,4764.348800,4814.548819
565,Alabama,Barbour,01005,4875.885667,4862.169667,4908.160333,4786.408667,4833.696667
974,Alabama,Bibb,01007,4727.518560,4706.299680,4785.613440,4625.073600,4726.646400
2232,Alabama,Blount,01009,4643.034462,4606.273385,4687.476000,4574.497846,4665.236538
...,...,...,...,...,...,...,...,...
1527,Wyoming,Sweetwater,56037,4780.547753,4819.313629,4731.694674,4753.577473,4910.167898
1517,Wyoming,Teton,56039,4294.326031,4282.941613,4162.277026,4303.009508,4529.784895
2234,Wyoming,Uinta,56041,4730.583808,4797.226752,4687.229184,4736.682624,4936.341280
1990,Wyoming,Washakie,56043,4558.186982,4545.855054,4427.304308,4473.671815,4565.537275


In [88]:
recent_uv.isna().sum() * 100/len(recent_uv)

# recent_uv['COUNTY_FIPS'].unique()

STATE_NAME               0.0
COUNTY_NAME              0.0
COUNTY_FIPS              0.0
UV_ Wh/m² (2000-2004)    0.0
UV_ Wh/m² (2005-2009)    0.0
UV_ Wh/m² (2010-2014)    0.0
UV_ Wh/m² (2015-2019)    0.0
UV_ Wh/m² (2020_2024)    0.0
dtype: float64

In [89]:
melanoma_incidence = pd.read_csv('../datasets/melanoma-county-incidence.csv', dtype={'FIPS': str}, skiprows=8, skipfooter=35, engine='python')
melanoma_incidence.columns
melanoma_incidence.drop(columns=['2023 Rural-Urban Continuum Codes([rural urban note])', 
                                 'Lower 95% Confidence Interval', 
                                 'Upper 95% Confidence Interval', 
                                 'CI*Rank([rank note])', 
                                 'Lower CI (CI*Rank)', 
                                 'Upper CI (CI*Rank)', 
                                 'Recent 5-Year Trend ([trend note]) in Incidence Rates', 
                                 'Lower 95% Confidence Interval.1', 
                                 'Upper 95% Confidence Interval.1'], inplace=True)
melanoma_incidence.rename({'County': 'COUNTY_NAME', 
                           'FIPS': 'COUNTY_FIPS', 
                           'Age-Adjusted Incidence Rate([rate note]) - cases per 100,000': 'AGE_RATE_PER100K', 
                           'Average Annual Count': 'AVG_ANNUAL_COUNT', 
                           'Recent Trend': 'RECENT_TREND'}, axis=1, inplace=True)
melanoma_incidence['COUNTY_FIPS'] = melanoma_incidence['COUNTY_FIPS'].str.zfill(5)
melanoma_incidence = melanoma_incidence[melanoma_incidence['COUNTY_FIPS'] != '00000'] # drops the annual agreggate row that has COUNTY_FIPS = '00000'
melanoma_incidence = melanoma_incidence.sort_values(['COUNTY_FIPS'])
melanoma_incidence




Unnamed: 0,COUNTY_NAME,COUNTY_FIPS,AGE_RATE_PER100K,AVG_ANNUAL_COUNT,RECENT_TREND
1096,"Autauga County, Alabama(6)",01001,24,16,stable
217,"Baldwin County, Alabama(6)",01003,37.2,115,stable
1209,"Barbour County, Alabama(6)",01005,22.8,8,*
1909,"Bibb County, Alabama(6)",01007,15.5,4,*
1569,"Blount County, Alabama(6)",01009,19.4,14,falling
...,...,...,...,...,...
292,"Teton County, Wyoming(6)",56039,35.2,10,stable
1836,"Uinta County, Wyoming(6)",56041,16.4,4,falling
3090,"Washakie County, Wyoming(6)",56043,*,3 or fewer,*
3108,"Weston County, Wyoming(6)",56045,*,3 or fewer,*


In [90]:
melanoma_incidence.isna().sum() * 100/len(melanoma_incidence)

COUNTY_NAME         0.0
COUNTY_FIPS         0.0
AGE_RATE_PER100K    0.0
AVG_ANNUAL_COUNT    0.0
RECENT_TREND        0.0
dtype: float64