# Charity Commission register

## Register of merged charities

### Intro

The data can be found at https://www.gov.uk/government/publications/register-of-merged-charities. 

### Imports

In [108]:
import altair as alt

import pandas as pd
import seaborn as sns
from ydata_profiling import ProfileReport

### Cleaning

In [109]:
df = pd.read_csv('../data/mergers_register_march_2024.csv', encoding='cp1252')

In [110]:
df.head()

Unnamed: 0,Name of transferring charity (transferor) and charity number (if any),Name of receiving charity (transferee) and charity number (if any),Date Vesting Declaration made,Date property transferred,Date merger registered
0,HENRY DREW THOMAS ALMSHOUSE (202453),HENRY DREW THOMAS ALMSHOUSES AND STARCROSS WEL...,,01/03/2023,26/03/2024
1,STARCROSS WELFARE TRUST (205101),HENRY DREW THOMAS ALMSHOUSES AND STARCROSS WEL...,,01/03/2023,26/03/2024
2,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,,31/10/2023,26/03/2024
3,THE ROYAL WELCH FUSILIERS REGIMENTAL COLLECTIO...,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,,31/10/2023,26/03/2024
4,AL-HIJRAH TRUST (1018850),AL-HIJRAH TRUST (1154046),09/11/2022,16/02/2014,25/03/2024


In [111]:
df.dtypes

Name of transferring charity (transferor) and charity number (if any)    object
Name of receiving charity (transferee) and charity number (if any)       object
Date Vesting Declaration made                                            object
Date property transferred                                                object
Date merger registered                                                   object
dtype: object

In [112]:
# shorten col names
df.columns = [
    'transferor', 'transferee', 'date_vesting', 'date_transferred', 'date_registered'
]

In [118]:
# convert first 2 cols to str
df['transferor'] = df['transferor'].apply(str).apply(str.strip)
df['transferee'] = df['transferee'].apply(str).apply(str.strip)

In [119]:
# convert date cols to datetime
date_cols = ['date_vesting', 'date_transferred', 'date_registered']

df[date_cols] = df[date_cols].apply(lambda x: pd.to_datetime(x, format='%d/%m/%Y'))

df.head()

Unnamed: 0,transferor,transferee,date_vesting,date_transferred,date_registered
0,HENRY DREW THOMAS ALMSHOUSE (202453),HENRY DREW THOMAS ALMSHOUSES AND STARCROSS WEL...,NaT,2023-03-01,2024-03-26
1,STARCROSS WELFARE TRUST (205101),HENRY DREW THOMAS ALMSHOUSES AND STARCROSS WEL...,NaT,2023-03-01,2024-03-26
2,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,NaT,2023-10-31,2024-03-26
3,THE ROYAL WELCH FUSILIERS REGIMENTAL COLLECTIO...,ROYAL WELCH FUSILIERS REGIMENTAL MUSEUM TRUST ...,NaT,2023-10-31,2024-03-26
4,AL-HIJRAH TRUST (1018850),AL-HIJRAH TRUST (1154046),2022-11-09,2014-02-16,2024-03-25


In [120]:
df['transferor'].str[-15:][:50]

0     SHOUSE (202453)
1      TRUST (205101)
2      TRUST (211019)
3     TION) (1057884)
4     TRUST (1018850)
5     ESCUE (1157836)
6     HAGLEY (509524)
7     G ROOM (201868)
8      HALL (1005430)
9     NGLISH (313157)
10    OCIETY (284005)
11    IRLS (306983-7)
12    HOOL (306983-8)
13    HOOL (306983-3)
14    IONAL (1085806)
15     (unregistered)
16    ATION (1156230)
17    GREEN (1153816)
18    Y LTD (1117963)
19    IATION (517690)
20     (unregistered)
21    WTOWN) (230943)
22    IATION (510122)
23     (Unregistered)
24     TRUST (522687)
25     PARK (1090710)
26     (unregistered)
27     FUND (1031481)
28    REETS (1099006)
29    ARITY (1071597)
30    HARITY (206283)
31    E HALL (501900)
32    STOKE (1151394)
33    E HALL (288669)
34    ID UK (1169724)
35    ESCUE (1046861)
36    E HALL (224795)
37     (Unregistered)
38    TRUST (1092940)
39     (Unregistered)
40    DATION (272389)
41    H LEP (1192144)
42    NTREE (1145663)
43     TRUST (292043)
44    TRUST (1187479)
45    GROU

In [133]:
# create charity number cols
df['transferor_number'] = df['transferor'].str.extract(
    pat='([\d\-\.\/]{5,})'
)
df['transferor_number'] = df['transferor_number'].str.replace(pat='[\-\.\/]', repl='-')

df['transferee_number'] = df['transferee'].str.extract(
    pat='([\d\-\.\/]{5,})'
)
df['transferee_number'] = df['transferee_number'].str.replace(pat='[\-\.\/]', repl='-')

df.iloc[10:20]

Unnamed: 0,transferor,transferee,date_vesting,date_transferred,date_registered,transferor_number,transferee_number
10,THE STRAWBERRY LINE SOCIETY (284005),THE STRAWBERRY LINE SOCIETY (1201543),NaT,2023-09-17,2024-03-20,284005,1201543
11,THE ADA SCHOLARSHIP IN CONNEXION WITH OXFORD H...,GDST PRIZES & SCHOLARSHIPS FUND(306983-9),NaT,1995-05-17,2024-03-19,306983-7,306983-9
12,THE FRANCIS STRONG AWARDS FUND ICW NEWCASTLE C...,GDST PRIZES & SCHOLARSHIPS FUND(306983-9),NaT,1995-05-17,2024-03-19,306983-8,306983-9
13,THE MISS MABEL E LEWIS GIFT TO WIMBLEDON HIGH ...,GDST PRIZES & SCHOLARSHIPS FUND(306983-9),NaT,1995-05-17,2024-03-19,306983-3,306983-9
14,LIVING WORD CHRISTIAN FELLOWSHIP INTERNATIONAL...,LIVING WORD CHRISTIAN FELLOWSHIP (1202201),NaT,2023-11-01,2024-03-19,1085806,1202201
15,SUDBOURNE AND TUNSTALL BAPTIST CHURCH (unregis...,GRACE CHURCH RENDLESHAM (1202945),2023-10-31,2023-10-31,2024-03-18,,1202945
16,GO RUN FOR FUN FOUNDATION (1156230),THE DAILY MILE FOUNDATION (1166911),NaT,2024-03-15,2024-03-15,1156230,1166911
17,THE MUSLIM ASSOCIATION OF HALL GREEN (1153816),ARRAHMA FOUNDATION (1195363),2023-08-01,2023-08-01,2024-03-14,1153816,1195363
18,DEDDINGTON VILLAGE NURSERY LTD (1117963),DEDDINGTON PARTNERSHIP FOUNDATION STAGE UNIT (...,2023-03-06,2023-10-15,2024-03-14,1117963,1100275
19,KENTON PARK COMMUNITY ASSOCIATION (517690),KENTON PARK COMMUNITY ASSOCIATION (1204136),2023-10-10,2023-10-10,2024-03-14,517690,1204136


### Number of mergers over time

#### Most frequent transferors/transferees

In [124]:
df['transferor_number'].value_counts()

transferor_number
1189059    5
1059082    2
1081081    2
1147851    2
266630     2
          ..
1066789    1
1066642    1
1066322    1
1067503    1
216355     1
Name: count, Length: 4531, dtype: int64

In [87]:
df['transferor_number'].str.extract('(\d{5,})').value_counts()

1053467    43
1125485    35
210203      9
226168      8
210202      8
           ..
1067166     1
1067168     1
1067173     1
1067176     1
900539      1
Name: count, Length: 4387, dtype: int64