## Import and configuration variables

In [70]:
import csv
import re
import requests
import os
from datetime import datetime

csv_arts = "../data/retracted_watch/arts.csv"
csv_history = "../data/retracted_watch/history.csv"
csv_religion = "../data/retracted_watch/religion.csv"
csv_architecture = "../data/retracted_watch/architecture.csv"
csv_cartography = "../data/retracted_watch/cartography.csv"
csv_journalism = "../data/retracted_watch/journalism.csv"
csv_philosophy = "../data/retracted_watch/philosophy.csv"

## Usefull functions

In [23]:
def write_list(l,file_path, header= True):
    f = open(file_path,"w+")
    initial_pos = 0
    
    #header
    if header:
        initial_pos = 1
        str_header = ''
        for k_header in l[0].keys():
            str_header = str_header + str(k_header) + ","
        f.write(str_header[:-1]+"\n")
        
    #content
    for l_index in range(initial_pos,len(l)):
        str_row = ''
        for k_att in l[l_index]:
            str_row = str_row + '"'+str(l[l_index][k_att]) +'"'+','
        f.write(str_row[:-1]+"\n")

## The main program  

The elaboratio steps to follow are: 
1. Get the COCI metadata for each retracted article in the csv
2. Write the results on a csv file 
3. Select the subset of elements from the list of all retracted items which have been found in COCI
4. Write the results of (3) on a csv file

**Note:** I used http://www.convertcsv.com/html-table-to-csv.htm to convert the HTML table into a csv file.


In [34]:
domain = "philosophy"
csv_to_analyse = csv_philosophy
coci_res_file = "../data/coci/retracted_"+domain+"_(coci_metadata).csv"
coci_res_file_min1cit = "../data/coci/retracted_"+domain+"_(min1cit).csv"

In [35]:
#(1) Get the COCI metadata for each retracted article in the csv
items = []
with open(csv_to_analyse) as a_file:
    csv_reader = csv.reader(a_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        
        regex = r"(10\..*)$"

        test_str = row[4]

        matches = re.finditer(regex, test_str, re.MULTILINE)

        for matchNum, match in enumerate(matches, start=1):
            if match:
                doi = match.group()
                r = requests.get('https://opencitations.net/index/coci/api/v1/metadata/'+str(doi))
                if len(r.json()) > 0:                
                    items.append(r.json()[0])

In [36]:
#(2) Write the results on a csv file
write_list(items,coci_res_file)

In [37]:
# (3) Select the subset of elements from the list of all retracted items which have been found in COCI

#Arts articles in COCI 
dois_min1cit = ["10.1200/jco.2013.49.1845","10.1177/0146167207312465","10.1016/j.shpsa.2011.02.001","10.1007/1-4020-3001-0_4","10.4103/0019-5545.105519","10.1007/s11245-007-9015-y","10.1007/s11059-008-4013-7","10.1111/j.0066-7372.2003.00023.x","10.1111/j.1477-4658.2009.00603.x","10.1515/9783110204544.6.277","10.2143/etl.75.4.504765","10.2143/rtpm.67.1.520","10.1017/ccol0521822424.012","10.1017/ccol0521650186.018"]

subset_items = []
with open(csv_to_analyse, 'r') as csvFile:
    reader = csv.reader(csvFile)
    for row in reader:
        regex = r"(10\..*)$"
        matches = re.finditer(regex, row[4], re.MULTILINE)
        for matchNum, match in enumerate(matches, start=1):
            if match:
                doi = match.group()
                if doi.lower() in dois_min1cit:
                    rowdict = {}
                    for j in range(0,len(row)):
                        rowdict[j] = row[j]
                    subset_items.append(rowdict)

#order them according to the results
sorted_list = []
for doi in dois_min1cit:
    for item in subset_items:
        if doi.lower() in item[4].lower():
            sorted_list.append(item)

In [38]:
#(4) Write the results of (3) on a csv file
write_list(sorted_list,coci_res_file_min1cit, False)

### COCI results: 
* **Arts DOIs with at least 1 citation:** ```dois_min1cit = ["10.1016/j.jesp.2011.02.006","10.1111/j.1365-2702.2010.03368.x","10.1109/esiat.2010.5568967","10.1109/iccsit.2010.5564505","10.1007/s11059-013-0186-9","10.1353/cdr.2011.0017","10.1109/iccsit.2010.5564489","10.1109/edt.2010.5496628"] ```
* **Religion DOIs with at least 1 citation:** ```["10.1016/j.leaqua.2011.02.002","10.1016/j.leaqua.2011.02.002","10.1016/j.leaqua.2011.02.002","10.1016/j.cub.2015.09.056","10.1080/014198700329114","10.4103/0019-5545.105526","10.1111/j.1365-2702.2008.02690.x","10.1371/journal.pone.0180487","10.1093/jis/etp023","10.1111/j.1466-7657.2011.00900.x","10.4103/0019-5545.105519","10.2143/etl.75.4.504765","10.2143/rtpm.67.1.520","10.1017/ccol0521822424.012","10.1017/ccol0521650186.018"]```
* **Architecture DOIs with at least 1 citation:** ```["10.1155/2015/954651","10.1016/j.sysarc.2010.08.005","10.1016/j.sbspro.2011.05.014","10.1109/opee.2010.5507971"] ```
* **Cartography DOIs with at least 1 citation:** ```["10.4113/jom.2010.1074","10.4236/ars.2016.51001","10.1186/s40201-014-0153-z","10.4113/jom.2010.1128","10.4113/jom.2009.1045"] ```
* **Philosophy DOIs with at least 1 citation:**
 ```["10.1200/jco.2013.49.1845","10.1177/0146167207312465","10.1016/j.shpsa.2011.02.001","10.1007/1-4020-3001-0_4","10.4103/0019-5545.105519","10.1007/s11245-007-9015-y","10.1007/s11059-008-4013-7","10.1111/j.0066-7372.2003.00023.x","10.1111/j.1477-4658.2009.00603.x","10.1515/9783110204544.6.277","10.2143/etl.75.4.504765","10.2143/rtpm.67.1.520","10.1017/ccol0521822424.012","10.1017/ccol0521650186.018"] ```

## Citations: after and before Retraction
Taking the previous results, the main object is to classify the citations into 3 different timeline steps, according to the retraction date: (1) Before (2) Same date (3) After. We consider also the "Same date" category, since some of the retracted document or/and the citations don't have a complete date description. e.g: Retracted article date = 2013, and a citation in the same year.

### Example from the retracted articles in Philosophy
**Title:** "Cancer Drugs In The United States: Justum Pretium—The Just Price"  
**DOI:** 10.1200/jco.2013.49.1845  
**Citations number:** 103  
**Retraction date:** 10/01/2015 (%m/%d/%Y)   
**Retraction reasons:** +Criminal Proceedings+Falsification/Fabrication of Data+Investigation by Company/Institution+Investigation by Third Party+Misconduct - Official Investigation/Finding

In [76]:
all_cits = {}

##Return Citations after the retraction date filter=creation:%3E2015-10-01
doi = "10.1200/jco.2013.49.1845"
retraction_date = "2015-10-01"

#with filter
#r = requests.get('https://opencitations.net/index/coci/api/v1/citations/'+str(doi)+"?filter=creation:"+str(cat[cat_k]["operation"])+str(retraction_date))
#without filter
r = requests.get('https://opencitations.net/index/coci/api/v1/citations/'+str(doi))    
if len(r.json()) > 0:      
    for elem in r.json():
        pattern = '%Y-%m-%d'
        if len(elem["creation"]) < 10:
            pattern = '%Y-%m'
        if len(elem["creation"]) < 7:
            pattern = '%Y'
        
        all_cits[elem["citing"]] = datetime.strptime(elem["creation"],pattern)

In [84]:
# Create a list of tuples sorted by index 1 i.e. value field     
all_cits = dict(sorted(all_cits.items() ,  key=lambda x: x[1]))

In [105]:
xy_points = []
index = {}
for k,v in all_cits.items():
    xy_points.append({"x":v.year, "y": k})
    
    index_k = str(v.year)+"-"+str(v.month)
    if index_k not in index:
        index[index_k] = 0
    index[index_k] += 1

xy_points_date_groups = []
for k,v in index.items():
    xy_points_date_groups.append({"x":k, "y": v})

In [106]:
write_list(xy_points, "../data/coci/coci_citations_ex1doi.csv")
write_list(xy_points_date_groups, "../data/coci/coci_citations_ex1doi_temp.csv")