# Agewise Colorado report

## 1. Load all necessary packages

In [1]:
import pandas as pd
import numpy as np
from fuzzywuzzy import process

## 2. Load all important tables from csv files originated from Google Analytics

### Alls

In [2]:
Alls=pd.read_csv('All page.csv')
Alls.head()

Unnamed: 0,Page,Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Entrances,Bounce Rate,% Exit,Page Value
0,/,Home - AgeWise Colorado,379,291,61.87,269,33.46%,32.72%,0
1,/participating-providers/,Provider Directory - AgeWise Colorado,193,155,112.82,46,52.17%,34.72%,0
2,/about/,About - AgeWise Colorado,112,86,133.68,13,53.85%,35.71%,0
3,/provider/nymbl-science/,Nymbl Science - AgeWise Colorado,106,97,155.12,21,71.43%,67.92%,0
4,/blog/formal-vs-informal-caregiving/,What is the Difference Between Formal and Info...,72,65,258.27,60,88.33%,84.72%,0


#### Page Title have duplicates, but can explain more things than Page, so we make a table "Title" just contain these two and will use it in the future for conversion use.

In [3]:
# Merge blog with blog titles
Title=Alls[['Page','Page Title']]
Title.head()
#blog=blog.merge(Title,on='Page',how='inner')

Unnamed: 0,Page,Page Title
0,/,Home - AgeWise Colorado
1,/participating-providers/,Provider Directory - AgeWise Colorado
2,/about/,About - AgeWise Colorado
3,/provider/nymbl-science/,Nymbl Science - AgeWise Colorado
4,/blog/formal-vs-informal-caregiving/,What is the Difference Between Formal and Info...


### In the Page Title column, all of the values are attached with _- AgeWise Colorado_, which is not helpful for later analysis and seems redundant. Therefore we remove it.

In [4]:
Alls['Page Title']=Alls['Page Title'].str.replace(' - AgeWise Colorado','')
Alls.head()

Unnamed: 0,Page,Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Entrances,Bounce Rate,% Exit,Page Value
0,/,Home,379,291,61.87,269,33.46%,32.72%,0
1,/participating-providers/,Provider Directory,193,155,112.82,46,52.17%,34.72%,0
2,/about/,About,112,86,133.68,13,53.85%,35.71%,0
3,/provider/nymbl-science/,Nymbl Science,106,97,155.12,21,71.43%,67.92%,0
4,/blog/formal-vs-informal-caregiving/,What is the Difference Between Formal and Info...,72,65,258.27,60,88.33%,84.72%,0


### Device

In [5]:
Device=pd.read_csv('Device.csv')
Device.head()

Unnamed: 0,Page,Source,Device Category,Landing Page,Social Source Referral,Pageviews,Sessions,Connect to Provider (Goal 1 Completions),Avg. Time on Page,Hits,Session Duration,Users
0,/,google,desktop,/,No,96,82,0,42.04,97,14541.0,61
1,/,(direct),desktop,/,No,92,72,0,72.96,92,13146.0,60
2,/participating-providers/,alittlehelp.org,desktop,/participating-providers/,No,48,32,0,143.93,62,8066.0,31
3,/blog/formal-vs-informal-caregiving/,google,desktop,/blog/formal-vs-informal-caregiving/,No,35,30,0,360.2,35,1881.0,30
4,/participating-providers/,google,desktop,/,No,29,0,0,176.53,37,0.0,20


### Source

In [6]:
Source=pd.read_csv('Source.csv')
Source.head()

Unnamed: 0,Page,Source,Pageviews,Sessions,Avg. Time on Page,Hits
0,/,google,134,103,44.83,137
1,/,(direct),125,90,80.08,125
2,/participating-providers/,google,69,5,128.98,86
3,/provider/nymbl-science/,m.facebook.com,61,1,185.0,97
4,/participating-providers/,alittlehelp.org,60,37,133.26,76


### Users

In [7]:
Users=pd.read_csv('Users.csv')
Users.head()

Unnamed: 0,Age,Gender,Avg. Time on Page,Users
0,18-24,male,166.21,23
1,65+,male,152.45,26
2,65+,female,147.55,102
3,45-54,male,115.67,23
4,45-54,female,99.89,29


### Load table: City and only select Pageviews in United States

In [8]:
City=pd.read_csv('city.csv')

In [9]:
City=City[City['Country']=='United States']
City.head()

Unnamed: 0,Page,City,Country,Pageviews,Sessions,Avg. Time on Page,Hits
0,/,Denver,United States,120,90,33.7,122
1,/participating-providers/,Denver,United States,48,8,130.53,54
2,/,(not set),United States,33,21,65.56,34
3,/about/,Denver,United States,30,3,228.24,30
4,/blog/category/webinars/,Denver,United States,30,0,23.22,30


## Website Hits

#### part of Page Title removed for the same reason as above

In [10]:
webhit=pd.read_csv('Website hits.csv')
webhit['Page Title']=webhit['Page Title'].str.replace(' - AgeWise Colorado','')
webhit.head()

Unnamed: 0,Page,Page Title,Hits,Avg. Time on Page,Pageviews,Connect to Provider (Goal 1 Abandoned Funnels),Connect to Provider (Goal 1 Abandonment Rate),Connect to Provider (Goal 1 Completions),Connect to Provider (Goal 1 Conversion Rate),Connect to Provider (Goal 1 Starts),Connect to Provider (Goal 1 Value)
0,/,Home,387,61.87,379,0,0.00%,0,0.00%,0,0.0
1,/participating-providers/,Provider Directory,239,112.82,193,0,0.00%,0,0.00%,0,0.0
2,/provider/nymbl-science/,Nymbl Science,155,155.12,106,0,0.00%,0,0.00%,0,0.0
3,/about/,About,113,133.68,112,0,0.00%,0,0.00%,0,0.0
4,/blog/formal-vs-informal-caregiving/,What is the Difference Between Formal and Info...,72,258.27,72,0,0.00%,0,0.00%,0,0.0


### Search

In [11]:
Search=pd.read_csv('Search term.csv')
Search.head()

Unnamed: 0,Search Term,Pageviews
0,caregiver support,4
1,connie ward,2
2,silver key,2
3,ace senior services,1
4,ageism,1


## Analysis

#### 1) Frequently Searched Term

__We find the unique Search Terms and check if there are terms with the same contents but minor changes(for example: "lifebio" and "life bio")__

In [12]:
unique_types=Search['Search Term'].unique()
unique_types

array(['caregiver support', 'connie ward', 'silver key',
       'ace senior services', 'ageism', 'bloom healthcare',
       'Care Managers', 'caregiving', 'cdhs', 'cleaning', 'cultivate',
       'fraud', 'home modifications', 'Hospice', 'human', 'insurance',
       'life bio', 'lifebio', 'moving', 'reverse mortgages', 'services',
       'silver bills', 'State Unit in Aging', 'state unit on aging'],
      dtype=object)

__From the list, we can see that lifebio/life bio and State Unit in Aging/state unit on aging are repetitative. The following code is to find these terms and correct them to what you want it to be.__

In [13]:
matches=process.extract('lifebio', unique_types, limit = len(unique_types))
for i in matches:
    if i[1] >= 80:
        Search.loc[Search['Search Term'] == i[0],'Search Term']='lifebio'

In [14]:
matches=process.extract('State Unit in Aging', unique_types, limit = len(unique_types))
for i in matches:
    if i[1] >= 80:
        Search.loc[Search['Search Term'] == i[0],'Search Term']='State Unit in Aging'

##### We sum Pageviews by grouping the Search Terms and make it a DataFrame that we can export later.
The reset_index() will make Search Index column an independent column, so it does not disappear after we export it.

In [15]:
Search=pd.DataFrame(Search.groupby('Search Term')['Pageviews'].sum())
Search=Search.reset_index()
Search['Search Term']=Search['Search Term'].str.lower()
Search=Search.sort_values(by='Pageviews',ascending=False)
Search.head()

Unnamed: 0,Search Term,Pageviews
6,caregiver support,4
21,silver key,2
2,state unit in aging,2
16,lifebio,2
10,connie ward,2


In [16]:
Search.to_csv('Search.csv',index=False)

### 2) Blog Post hits and Avg Time on Page- ranking 

Here we rank the Hits of each blog post

#### Filter out Blog pages from Page

In [17]:
# Filter out only blogs
blog=City[City['Page'].str.contains( pat = 'blog' )]
blog.head()

Unnamed: 0,Page,City,Country,Pageviews,Sessions,Avg. Time on Page,Hits
4,/blog/category/webinars/,Denver,United States,30,0,23.22,30
22,/blog/what-exercise-and-fitness-routines-make-...,Denver,United States,9,3,363.5,9
23,/blog/home-modifications-for-comfort-safety/,Denver,United States,8,0,25.0,8
29,/blog/falls-are-preventable-heres-how/,(not set),United States,7,7,0.0,7
30,/blog/service_area/denver-metro-area/page/4/,(not set),United States,7,2,29.0,7


a. Blog Avg Time on Page \
Here we inner join the "blog" table with "Title" on Page column. Then we sum the Avg time on Page by Page Title and display in descending order

In [18]:
blogs=blog.merge(Title,on='Page',how='inner')
blogs=pd.DataFrame(blogs.groupby('Page Title')['Avg. Time on Page'].sum().sort_values(ascending=False))

In [19]:
blogs=blogs.reset_index()
blogs.head()

Unnamed: 0,Page Title,Avg. Time on Page
0,Falls Are Preventable: Here's How - AgeWise Co...,39512.5
1,What Transportation Options Do Seniors Have If...,2350.25
2,Dementia and Alzheimer's: The Differences and ...,2062.0
3,Self-Care for Caregivers￼ - AgeWise Colorado,1810.5
4,Home Modifications for Seniors Aging in Place ...,1684.5


b.Blog Hits

similar code as above

In [20]:
hits=blog[['Page','Hits']]
hits=hits.merge(Title, on='Page',how='inner')
hits=pd.DataFrame(hits.groupby('Page Title')['Hits'].sum().sort_values(ascending=False)).reset_index()
hits.head()

Unnamed: 0,Page Title,Hits
0,Falls Are Preventable: Here's How - AgeWise Co...,1001
1,Webinars Archives - AgeWise Colorado,58
2,What Transportation Options Do Seniors Have If...,28
3,What Exercise and Fitness Routines Make Sense ...,26
4,What is the Difference Between Formal and Info...,25


In [21]:
hits.to_csv('hits.csv',index=False)

## Blog Post Misses

Google Analytics will not show Web Pages with 0 pageviews, therefore alternatively we select pages where guests spent 0 second in.

In [22]:
misses=pd.DataFrame(blogs.groupby(['Page Title'])['Avg. Time on Page'].sum().sort_values(ascending=False))
misses=misses[misses['Avg. Time on Page']==0]
misses=pd.DataFrame(misses.groupby(['Page Title'])['Avg. Time on Page'].sum())
misses

Unnamed: 0_level_0,Avg. Time on Page
Page Title,Unnamed: 1_level_1
Colorado Center for Aging 2022 Legislative Session Priorities - AgeWise Colorado,0.0
Combatting Loneliness and Social Isolation in Older Adults - AgeWise Colorado,0.0
Evaluating Your Sources of Retirement Income - AgeWise Colorado,0.0
Exploring Senior Housing Options for Your Loved One - AgeWise Colorado,0.0
Spotting and Avoiding Fraud and Scams Targeting Seniors - AgeWise Colorado,0.0
Vitamins for Seniors and Older Adults - AgeWise Colorado,0.0


In [23]:
misses.to_csv('misses.csv',index=False)

## Connecting to the provider site and Device  (device?)

The code is right. However, there are something wrong with the data (everything in the related column is 0) about Connect to Provider.

In [24]:
Devices=Device[['Device Category','Sessions','Connect to Provider (Goal 1 Completions)']]
Devices.head()

Unnamed: 0,Device Category,Sessions,Connect to Provider (Goal 1 Completions)
0,desktop,82,0
1,desktop,72,0
2,desktop,32,0
3,desktop,30,0
4,desktop,0,0


In [25]:
Devices=pd.DataFrame(Devices.groupby('Device Category')[['Sessions','Connect to Provider (Goal 1 Completions)']].sum())
Devices

Unnamed: 0_level_0,Sessions,Connect to Provider (Goal 1 Completions)
Device Category,Unnamed: 1_level_1,Unnamed: 2_level_1
desktop,683,0
mobile,834,0
tablet,355,0


Here we calculate the percentage of each Device used to access the web pages

In [26]:
Devices['Session Rate']=Devices['Sessions']/Devices['Sessions'].sum()

In [27]:
Devices

Unnamed: 0_level_0,Sessions,Connect to Provider (Goal 1 Completions),Session Rate
Device Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
desktop,683,0,0.36485
mobile,834,0,0.445513
tablet,355,0,0.189637


### Provider Misses

It is similar code as Blog misses. We use .str.contains(pat='TEXT') to find rows that contains provider.

In [28]:
provider=Alls[Alls['Page'].str.contains( pat = '/provider/' )]
provider=pd.DataFrame(provider.groupby('Page Title')['Avg. Time on Page'].sum().sort_values())
noprovider=provider[provider['Avg. Time on Page']==0]
noprovider

Unnamed: 0_level_0,Avg. Time on Page
Page Title,Unnamed: 1_level_1
​﻿​Ace Senior Services,0.0
CarePenguin,0.0
Coldwell Banker Realty - Sharon Wiest,0.0
Colorado Center for Aging,0.0
Columbine Health Systems Center for Healthy Aging,0.0
Vintage - Nutrition Programs,0.0
DRCOG's Area Agency on Aging,0.0
CU Anschutz Multidisciplinary Center on Aging,0.0
Douglas County Libraries,0.0
Prime of Life Tech,0.0


## Provider Hits

We find top hits pages

In [29]:
providerhit=webhit[webhit['Page'].str.contains( pat = '/provider/' )]
providerhit=providerhit[['Page','Page Title','Hits']]
providerhit.head()

Unnamed: 0,Page,Page Title,Hits
2,/provider/nymbl-science/,Nymbl Science,155
12,/provider/state-unit-on-aging/,State Unit on Aging,29
24,/provider/care-matters-always/,Care Matters Always,17
25,/provider/jefferson-county-council-on-aging-jc...,Jefferson County Council on Aging (JCCOA),17
27,/provider/aging-with-dignity-five-wishes/,Aging With Dignity | Five Wishes,16


In [30]:
providerhit=pd.DataFrame(providerhit.groupby('Page Title')['Hits'].sum().sort_values(ascending=False))
providerhit.head()

Unnamed: 0_level_0,Hits
Page Title,Unnamed: 1_level_1
Nymbl Science,155
State Unit on Aging,29
Care Matters Always,17
Jefferson County Council on Aging (JCCOA),17
"Transitions Senior Care, LLC",16


In [31]:
providerhit.to_csv('Provider Hits.csv')

## All providers (maybe)

Google Analytics does not provide pages that have 0 views. Therefore to find all providers, we need to select columns that allow value 0 in them. \
Here we choose Avg Time on Page

In [32]:
provider

Unnamed: 0_level_0,Avg. Time on Page
Page Title,Unnamed: 1_level_1
​﻿​Ace Senior Services,0.00
CarePenguin,0.00
Coldwell Banker Realty - Sharon Wiest,0.00
Colorado Center for Aging,0.00
Columbine Health Systems Center for Healthy Aging,0.00
...,...
DRCOG Area Agency on Aging Food & Nutrition Resources,564.00
RideNoCo,592.00
Compassion and Choices,609.25
A Little Help,1003.00


We discover which provider visitors spend more time on

In [33]:
a=webhit[['Page Title','Hits']]
provider=provider.merge(a,on='Page Title', how='inner')
provider=pd.DataFrame(provider.groupby('Page Title')[['Avg. Time on Page','Hits']].sum())
provider=provider.sort_values(by='Avg. Time on Page',ascending=False)
provider=provider.reset_index()
provider

Unnamed: 0,Page Title,Avg. Time on Page,Hits
0,SilverBills,3861.00,12
1,Compassion and Choices,1827.75,15
2,A Little Help,1003.00,4
3,Knoebel Institute for Healthy Aging,627.00,7
4,RideNoCo,592.00,1
...,...,...,...
85,Columbine Health Systems Center for Healthy Aging,0.00,3
86,GoldLeaf HomeHealth,0.00,2
87,ECAAA - Project SMILE Nutrition Program,0.00,1
88,Douglas County Libraries,0.00,4


In [34]:
provider.to_csv('provider.csv',index=False)

## Demographics of Users(cannot filter out US searches)

I tried to add Country into the column, but Google Analytics shows error, so the result contains all countries

In [35]:
# Gender
Sex=pd.DataFrame(Users.groupby('Gender')['Users'].sum())
Sex['%']=Sex['Users']/Sex['Users'].sum()
Sex

Unnamed: 0_level_0,Users,%
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
female,276,0.683168
male,128,0.316832


In [36]:
# Age
Age=pd.DataFrame(Users.groupby('Age')['Users'].sum())
Age['%']=Age['Users']/Age['Users'].sum()
Age

Unnamed: 0_level_0,Users,%
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
18-24,54,0.133663
25-34,46,0.113861
35-44,48,0.118812
45-54,52,0.128713
55-64,76,0.188119
65+,128,0.316832


## Search Terms

In [37]:
Search.head()

Unnamed: 0,Search Term,Pageviews
6,caregiver support,4
21,silver key,2
2,state unit in aging,2
16,lifebio,2
10,connie ward,2


## Referrals (not filtered by country)

Instead of Yes/No, I change them to 1/0 for analysis

In [38]:
Referral=Device[['Source','Social Source Referral']]
Referral['Social Source Referral']=Referral['Social Source Referral'].replace({'No':0,'Yes':1})
Referral.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Referral['Social Source Referral']=Referral['Social Source Referral'].replace({'No':0,'Yes':1})


Unnamed: 0,Source,Social Source Referral
0,google,0
1,(direct),0
2,alittlehelp.org,0
3,google,0
4,google,0


Here we sum to see the the amount of referral on Social Media

In [39]:
Referral['Source']=Referral['Source'].replace(['m.facebook.com','l.facebook.com','lm.facebook.com','facebook.com'],"facebook")
Referral['Source']=Referral['Source'].replace(['mail.google.com ','analytics.google.com '],"google")
Referral=pd.DataFrame(Referral.groupby('Source')['Social Source Referral'].sum().sort_values(ascending=False))
Referral.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Referral['Source']=Referral['Source'].replace(['m.facebook.com','l.facebook.com','lm.facebook.com','facebook.com'],"facebook")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Referral['Source']=Referral['Source'].replace(['mail.google.com ','analytics.google.com '],"google")


Unnamed: 0_level_0,Social Source Referral
Source,Unnamed: 1_level_1
facebook,1169
yahoo,0
url-opener.com,0
agingresourcesdougco.org,0
alittlehelp.org,0


### Landing Page

We can see which page result in the most landing pages. Here it is the Home Page

In [55]:
Landing=pd.DataFrame(Device['Landing Page'].value_counts())
Landing=Landing.reset_index()
Landing=Landing.rename(columns={'index':'Page'})
Landing=Landing.merge(Title, on='Page', how='inner')
Landing=Landing[['Page','Page Title','Landing Page']]
Landing.head()

Unnamed: 0,Page,Page Title,Landing Page
0,/,Home - AgeWise Colorado,311
1,/participating-providers/,Provider Directory - AgeWise Colorado,74
2,/participating-providers/,​﻿​Provider Directory - AgeWise Colorado,74
3,/about/,About - AgeWise Colorado,47
4,/community-resources/,Aging Advocacy and Resources - AgeWise Colorado,28


## Source

Here it talks about the sources which visitors click to enter our web page. Source like facebook varies from mobile, messanger and laptop version. What we need to do is merge them all under "Facebook". I did the same thing for Google

In [41]:
Source

Unnamed: 0,Page,Source,Pageviews,Sessions,Avg. Time on Page,Hits
0,/,google,134,103,44.83,137
1,/,(direct),125,90,80.08,125
2,/participating-providers/,google,69,5,128.98,86
3,/provider/nymbl-science/,m.facebook.com,61,1,185.00,97
4,/participating-providers/,alittlehelp.org,60,37,133.26,76
...,...,...,...,...,...,...
1679,/participating-providers/?_service_area=northw...,alittlehelp.org,0,0,0.00,1
1680,/participating-providers/?_services_in_home_ca...,(direct),0,0,0.00,1
1681,/participating-providers/?_services_in_home_ca...,duckduckgo,0,0,0.00,1
1682,/participating-providers/?_services_in_home_ca...,(direct),0,0,0.00,1


In [42]:
Source['Source'].value_counts()

m.facebook.com                        728
google                                162
l.facebook.com                        152
(direct)                              131
hs_email                               92
facebook                               83
alittlehelp.org                        81
lm.facebook.com                        74
bing                                   50
duckduckgo                             24
yahoo                                  15
cultivate.ngo                          15
jeffcolibrary.bibliocommons.com        14
app.asana.com                          11
mylivingobituary.com                    9
srcaging.org                            7
mail.google.com                         4
lens.google.com                         3
coloradononprofits.org                  3
search.aol.com                          3
trustedtransitionsofco.com              3
facebook.com                            2
agingresourcesdougco.org                2
startpage.com                     

In [43]:
Source['Source']=Source['Source'].replace(['m.facebook.com','l.facebook.com','lm.facebook.com','facebook.com'],"facebook")
Source['Source']=Source['Source'].replace(['mail.google.com','lens.google.com','analytics.google.com'],"google")
pd.DataFrame(Source['Source'].value_counts()).head()

Unnamed: 0,Source
facebook,1039
google,171
(direct),131
hs_email,92
alittlehelp.org,81


## blogs:Only in Colorado

The company focuses on US visitors. Here we specify CO visitors and non-CO visitors

In [44]:
colorado=pd.read_csv('Colorado Cities.csv')
colorado.head()

Unnamed: 0,Rank,City,Population
0,1,Denver,715878
1,2,Colorado Springs,471686
2,3,Aurora,379434
3,4,Fort Collins,166069
4,5,Lakewood,155733


In [45]:
city=pd.DataFrame(blog.groupby('City')[['Hits','Pageviews']].sum().sort_values(by='Hits', ascending=False))
city

Unnamed: 0_level_0,Hits,Pageviews
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Denver,288,284
(not set),126,124
Colorado Springs,105,104
Aurora,67,65
Fort Collins,55,44
...,...,...
Camdenton,1,1
Carbondale,1,1
Loganville,1,1
Castle Pines,1,1


In [46]:
USCity=city.merge(colorado, on='City',how='left')
co=USCity[USCity['Population']>0]
co=co[['City','Hits','Pageviews']]
co

Unnamed: 0,City,Hits,Pageviews
0,Denver,288,284
2,Colorado Springs,105,104
3,Aurora,67,65
4,Fort Collins,55,44
5,Grand Junction,40,40
...,...,...,...
174,Minturn,1,1
176,Meridian,1,1
179,Lyons,1,1
182,Carbondale,1,1


In [47]:
co.to_csv('co.csv',index=False)

## blogs:Non-Colorado US cities

In [48]:
USCity.head()
noco=USCity[USCity['Population'].isnull()]
noco

Unnamed: 0,City,Hits,Pageviews,Rank,Population
1,(not set),126,124,,
18,Chicago,14,14,,
23,Dallas,12,12,,
24,Bexley,12,12,,
25,San Antonio,11,11,,
...,...,...,...,...,...
178,Macon,1,1,,
180,Butte,1,1,,
181,Camdenton,1,1,,
183,Loganville,1,1,,


In [49]:
noco.to_csv('noco.csv',index=False)

## Hits on donate button

We find hits on the donate page. Almost all of the following are the same page, but varies due to the source

In [50]:
donate=webhit[webhit['Page'].str.contains( pat = '/donate/' )]
donate=donate[['Page','Page Title','Hits']]
donate

Unnamed: 0,Page,Page Title,Hits
104,/donate/,Donate,4
105,/donate/#FUNANWRZLDC?form=FUNANWRZLDC,Donate,4
1224,/donate/?fbclid=IwAR2kX-qOQn5S1R_pTqwBwn5xy032...,Donate,1
1225,/donate/#FUNANWRZLDC/thankyou?form=FUNANWRZLDC,Donate,1


In [51]:
donate.to_csv('Donate.csv')

## Volunteer hits

We can find the hits for Volunteer page

In [52]:
volunteer=webhit[webhit['Page'].str.contains( pat = '/volunteer/' )]
volunteer=pd.DataFrame(volunteer.groupby('Page Title')['Hits'].sum())
volunteer

Unnamed: 0_level_0,Hits
Page Title,Unnamed: 1_level_1
Volunteer,10


## About

We find information about "About" and "Get_ involved" page

In [53]:
about=webhit[webhit['Page'].str.contains( pat = '/about/' )]
about[about['Page Title']=='About']

Unnamed: 0,Page,Page Title,Hits,Avg. Time on Page,Pageviews,Connect to Provider (Goal 1 Abandoned Funnels),Connect to Provider (Goal 1 Abandonment Rate),Connect to Provider (Goal 1 Completions),Connect to Provider (Goal 1 Conversion Rate),Connect to Provider (Goal 1 Starts),Connect to Provider (Goal 1 Value)
3,/about/,About,113,133.68,112,0,0.00%,0,0.00%,0,0.0


In [54]:
involved=webhit[webhit['Page'].str.contains( pat = '/get-involved/' )]
involved

Unnamed: 0,Page,Page Title,Hits,Avg. Time on Page,Pageviews,Connect to Provider (Goal 1 Abandoned Funnels),Connect to Provider (Goal 1 Abandonment Rate),Connect to Provider (Goal 1 Completions),Connect to Provider (Goal 1 Conversion Rate),Connect to Provider (Goal 1 Starts),Connect to Provider (Goal 1 Value)
6,/get-involved/,Get Involved,53,42.29,53,0,0.00%,0,0.00%,0,0.0
