In [75]:
import pandas as pd
import numpy as np

In [76]:
working_df = pd.read_csv('ao3_lockwood_and_co_ao_21042023_1857.csv')

In [77]:
working_df.columns

Index(['link', 'title', 'author', 'published', 'updatedate', 'chapters',
       'language', 'words', 'kudos', 'comments', 'bookmarks', 'hits',
       'rating', 'chapter', 'chapter_max', 'completion', 'currentdate',
       'datediff', 'classification'],
      dtype='object')

change published and updatedate into datetime

In [78]:
working_df['published'] = pd.to_datetime(working_df['published'])
working_df['updatedate'] = pd.to_datetime(working_df['updatedate'])

In [79]:
working_df['currentdate'] = max(working_df['updatedate'])
working_df['currentdate']

0      2023-04-21
1      2023-04-21
2      2023-04-21
3      2023-04-21
4      2023-04-21
          ...    
1318   2023-04-21
1319   2023-04-21
1320   2023-04-21
1321   2023-04-21
1322   2023-04-21
Name: currentdate, Length: 1323, dtype: datetime64[ns]

In [80]:
working_df['datediff'] = (working_df['currentdate']-working_df['updatedate'])/np.timedelta64(1,'D')
working_df['datediff']

0          0.0
1          0.0
2          0.0
3          0.0
4          0.0
         ...  
1318    2934.0
1319    3062.0
1320    3334.0
1321    3362.0
1322    3413.0
Name: datediff, Length: 1323, dtype: float64

In [81]:
working_df['classification'] = working_df.apply(lambda row: 'oneshot' if row['chapter_max']=='1' else ('multichapter(complete)' if row['completion']=='completed' else ('multichapter(updating)' if row['datediff']<=60 else 'multichapter(dormant)')), axis=1)
working_df['classification']

0       multichapter(updating)
1       multichapter(updating)
2                      oneshot
3       multichapter(updating)
4                      oneshot
                 ...          
1318                   oneshot
1319    multichapter(complete)
1320    multichapter(complete)
1321                   oneshot
1322                   oneshot
Name: classification, Length: 1323, dtype: object

In [82]:
np.sort(working_df['rating'].unique())

array(['Explicit', 'General Audiences', 'Mature', 'Not Rated',
       'Teen And Up Audiences'], dtype=object)

In [83]:
def get_unique_item(column):
    item_dic={}
    for row in column:
        row_item = row.replace("[","").replace("]","").replace("'","").replace('"','').split(",")
        for item in row_item:
            item=item.strip()
            if item in item_dic.keys():
                item_dic[item] = item_dic[item]+1
            else:
                item_dic[item] = 1
    item_keys=list(item_dic.keys())
    item_keys.sort()
    sorted_item = {i: item_dic[i] for i in item_keys}
    for k, v in sorted_item.items():
        print(k, v)
    return sorted_item

In [84]:
character=get_unique_item(working_df['characters'])

 46
(Briefly) George Cubbins 1
(Briefly) Holly Munro 1
(Briefly) Quill Kipps 1
(Cameo) 1
(Mentioned) Bubs 1
(Mentioned) Celia Lockwood 1
(Mentioned) Donald Lockwood 1
(Mentioned) Hera Syndulla 1
(Mentioned) Holly Munro 1
(Mentioned) Jessica Lockwood 1
(also just mentioned) 1
(also mentioned) 1
(barely) - Character 1
(briefly) 1
(implied) - Character 1
(mentioned) 3
(the last three are only mentioned) 1
35 Portland Row - Character 2
Aaron Hotchner 1
Aaron Minyard 1
Abigail Hobbs 1
Adelaide Winkman 3
Adora (She-Ra) 1
Adult Lockwood 1
Adult Lucy - Character 1
Aerith Gainsborough 1
Aikemere 1
Albert Browne 1
Albert Joplin 3
Albert Joplin | Pamela Joplin 5
Albus Dumbledore 1
Alcatraz Smedry 1
Alcina Dimitrescu 1
Alcina Dimitrescus Daughters 1
Alec Lightwood 2
Aleksander of Hohenburg 1
Alfie Morgan 2
Alfie Morgan (mentioned) 1
Alfred Pennyworth 1
Allan-a-Dale 1
Alma Peregrine 1
Amina El Maghrabi 1
Andrew Minyard 1
Angie (Resident Evil) 1
Annabel Ward 5
Annabelle Ward 1
Annabeth Chase (Percy 

In [85]:
relationship=get_unique_item(working_df['relationship'])

 113
(Background) Lucy Carlyle/Anthony Lockwood 2
(Briefly) Lucy Carlyle & Holly Munro 1
(IMPLIED) 1
(suggested) Bartimaeus/Ptolemy 1
Adora/Catra (She-Ra) 1
Aerith Gainsborough/Tifa Lockhart 1
Albert Browne/Scarlett McCain 1
Alcina Dimitrescu/Original Female Character(s) 1
Alcina Dimitrescu/Reader 1
Aleksander of Hohenberg/Deryn Sharp 1
Amina El Maghrabi/Daisy Wells 1
Annabel Ward/John Fairfax 1
Annabeth Chase/Percy Jackson 2
Anthony Bridgerton & Francesca Bridgerton 1
Anthony Bridgerton/Kate Sharma (mentioned) 1
Anthony Bridgerton/Kate Sheffield | Kate Sharma 5
Anthony Lockwood & Albert Brown 1
Anthony Lockwood & George Cubbins 2
Anthony Lockwood & George Karim 2
Anthony Lockwood & Holly Munro 21
Anthony Lockwood & Inspector Barnes 2
Anthony Lockwood & Jessica Lockwood 17
Anthony Lockwood & Julius Winkman 1
Anthony Lockwood & Lucy Carlyle 1
Anthony Lockwood & Lucy Carlyle & George Cubbins 1
Anthony Lockwood & Lucy Carlyle & George Karim | George Cubbins 1
Anthony Lockwood & Nigel “Gra

In [86]:
tags=get_unique_item(working_df['tags'])

 37
#LetGeorgeSayFuck2023 1
(AFFECTIONATE) 1
(EVENTUALLY) resolved sexual tension 1
(LET ME FULFILL MY FANTASIES OKAY) 1
(Lucy Carlyle being Lockwoods favourite) 1
(Or is it?) 1
(Theyre all in their early 20s & Kipps is mid-20s) 1
(a teensy bit) 1
(and unsure how to tag this) 1
(as expected) 1
(at least in my canon) 1
(because Lockwood needs the comfort more than Lucy does) 1
(because of course I couldnt just stop at 5) 1
(but in a lighthearted way) 1
(dont shoot the messenger) 1
(eventually) - Freeform 1
(for reproductive purposes) 1
(he’s going to Area 51 don’t tell Lucy) 1
(how was that not already a tag) 1
(if you dont count the skull) 1
(if you squint really hard) 1
(if youve ever thought that to yourself then youre in the right place) 1
(implied) - Freeform 1
(in the loosest possible definition) 1
(just a lil bit) 1
(looks at major character death tag in a hunger games au) 1
(loosely based regency im not doing enough research for accuracy) 1
(maybe? Not sure about what time perio

In [87]:
tags_values=dict(sorted(tags.items(), key=lambda item: item[1]))
for k, v in tags_values.items():
    print(k, v)

#LetGeorgeSayFuck2023 1
(AFFECTIONATE) 1
(EVENTUALLY) resolved sexual tension 1
(LET ME FULFILL MY FANTASIES OKAY) 1
(Lucy Carlyle being Lockwoods favourite) 1
(Or is it?) 1
(Theyre all in their early 20s & Kipps is mid-20s) 1
(a teensy bit) 1
(and unsure how to tag this) 1
(as expected) 1
(at least in my canon) 1
(because Lockwood needs the comfort more than Lucy does) 1
(because of course I couldnt just stop at 5) 1
(but in a lighthearted way) 1
(dont shoot the messenger) 1
(eventually) - Freeform 1
(for reproductive purposes) 1
(he’s going to Area 51 don’t tell Lucy) 1
(how was that not already a tag) 1
(if you dont count the skull) 1
(if you squint really hard) 1
(if youve ever thought that to yourself then youre in the right place) 1
(implied) - Freeform 1
(in the loosest possible definition) 1
(just a lil bit) 1
(looks at major character death tag in a hunger games au) 1
(loosely based regency im not doing enough research for accuracy) 1
(maybe? Not sure about what time period th

In [88]:
author_df = working_df.groupby(['author'], as_index=False).agg({'updatedate':'max', 'published':'min'})
author_df = author_df.rename(columns={'updatedate':'lastauthorupdate','published':'firstauthorupdate'})
author_df

Unnamed: 0,author,lastauthorupdate,firstauthorupdate
0,13atoms (2Atoms),2023-04-07,2023-02-23
1,1Lovepsych,2021-11-24,2021-08-27
2,35portlandrow,2021-12-08,2015-04-29
3,425anonymous,2021-05-29,2021-01-14
4,AJ_Bullet,2023-03-30,2023-03-16
...,...,...,...
480,writerfan2013,2023-04-08,2017-10-18
481,xluminaheart,2022-06-01,2022-02-16
482,youareiron_andyouarestrong,2023-02-25,2023-02-25
483,yuulei (ixoria),2023-03-27,2023-03-21


In [89]:
working_df=working_df.merge(author_df, how='left', on='author')

In [91]:
working_df['author_lastupdate_diff'] = (working_df['currentdate']-working_df['lastauthorupdate'])/np.timedelta64(1,'D')
working_df['daysactive'] = (working_df['lastauthorupdate']-working_df['firstauthorupdate'])/np.timedelta64(1,'D')
working_df['author_activity'] = working_df['author_lastupdate_diff'].apply(lambda x: 'active' if x<=60 else 'inactive')

In [95]:
working_df[['lastauthorupdate','firstauthorupdate','daysactive']].sort_values(by=['daysactive'], ascending=False)

Unnamed: 0,lastauthorupdate,firstauthorupdate,daysactive
1295,2023-02-04,2014-02-05,3286.0
1321,2023-02-04,2014-02-05,3286.0
834,2023-02-04,2014-02-05,3286.0
1319,2023-02-04,2014-02-05,3286.0
1294,2023-02-04,2014-02-05,3286.0
...,...,...,...
1156,2019-04-07,2019-04-07,0.0
684,2023-02-25,2023-02-25,0.0
682,2023-02-25,2023-02-25,0.0
672,2023-02-26,2023-02-26,0.0


In [92]:
working_df.to_csv('ao3_lockwood_and_co_ao_21042023_1857.csv', index=False)