# Download files and all metadata for records in a group

This uses an endpoint for private files. It saves the files for one or more records and it creates JSON and CSV versions of both the record metadata and file metadata.

Instructions:
1. Save a CSV called item-ids.csv in the same folder as this notebook. Include a header row called item_id
2. Add the list of item ids (even if it's only one) to the first column of that CSV. The item id is the number at the end of the URL for a record. You'll also see this referred to as 'article id'.
4. Create a token for your account (https://help.figshare.com/article/how-to-get-a-personal-token)
5. Paste the token into a text document called my-token.txt and put that in the same folder as this notebook

Caveat: This is not an official Figshare script - it is an example to demostrate API functionality.

In [None]:
import json
import requests
import pandas as pd
import csv
from pathlib import Path

In [None]:
#Set the token in the header and base URL

text_file = open("my-token.txt", "r")
TOKEN = text_file.read()
TOKEN.strip() #removes any hidden spaces
text_file.close()


api_call_headers = {'Authorization': 'token ' + TOKEN}

#Set the base URL
BASE_URL = 'https://api.figshare.com/v2'

In [None]:
#Open a file
df_ids = pd.read_csv('item-ids.csv',encoding='utf-8')
#Make a list
ids = df_ids.values.tolist()
#This is a list of list. Get the first value of each list
ids = [item[0] for item in ids]
print(len(ids))

## Download all the metadata from your group
This creates a JSON and CSV of the full metadata record for each item as well as a JSON file for just file info: article id, file id, file name, computed md5 (this was computed by Figshare during upload so you can check against this later if you like)

In [None]:
#This function deals with nested json values
def exists(obj, chain):
    _key = chain.pop(0)
    if _key in obj:
        return exists(obj[_key], chain) if chain else obj[_key]

#Create an empty list to hold all metadata and the file info
full_metadata = []
file_info = []

#Make the API call to get metadata for each id
for i in ids:
    record = json.loads(requests.get('https://api.figshare.com/v2/account/articles/' + str(i), headers=api_call_headers).content)
    
    for j in record['files']:
        entry = {"article_id":i, "file_id":j['id'], "name":j['name'], "computed_md5":j['computed_md5']}
        file_info.append(entry)

    
    #add the full metadata to a growing dictionary
    full_metadata.append(record)

#file_download_info.close()

#Save the JSON
with open('full-metadata.json', 'w') as f:
    json.dump(full_metadata, f)
    
with open('file-info.json', 'w') as f:
    json.dump(file_info, f)

#Create a dataframe from the JSON formatted full metaddata
df_full = pd.DataFrame(full_metadata)
#Save a CSV file of all the metadata
save_file = df_full.to_csv('full-metadata.csv',encoding='utf-8')

#Create a dataframe from the JSON formatted file metadata
df_files = pd.DataFrame(file_info)
#Save a CSV file of all the metadata
save_file = df_files.to_csv('file-metadata.csv',encoding='utf-8')

print('Metadata files created.', len(full_metadata),'full records saved and',len(file_info), 'file records saved.')


## Download files to one folder

In [None]:
#Download each file to a 'files' subfolder and save with the file name

for k in file_info:
    response = requests.get('https://api.figshare.com/v2/file/download/' + str(k['file_id']), headers=api_call_headers)
    Path('files').mkdir(exist_ok=True)
    open('files/' + k['name'], 'wb').write(response.content)

print('All done.')

## OR download the files to folders labeled by article id

In [None]:
#Download each file to a subfolder named for the article id and save with the file name

for k in file_info:
    response = requests.get('https://api.figshare.com/v2/file/download/' + str(k['file_id']), headers=api_call_headers)
    Path(str(k['article_id'])).mkdir(exist_ok=True)
    open(str(k['article_id']) + '/' + k['name'], 'wb').write(response.content)
    
print('All done.')