In [1]:
!pip install couchdb
!pip install pandas

Collecting couchdb
[?25l  Downloading https://files.pythonhosted.org/packages/ff/35/6660f7526c5d509b13264b27642de73754bd3d0addf56b175601c8b951e1/CouchDB-1.2-py2.py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 1.8MB/s eta 0:00:01
[?25hInstalling collected packages: couchdb
Successfully installed couchdb-1.2


In [2]:
import couchdb
import time
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [12]:
# Read Access Credentials
with open("passwords.csv") as myfile:
    head = [next(myfile) for x in range(1)]
info = str(head[0]).split(',')
name = info[0]
pw = info[1]

In [13]:
# Connect to Server
secure_remote_server = couchdb.Server('https://'+name+':'+pw+'@couchdb3.prtd.app/')
db = secure_remote_server['anc5']

In [14]:
# Extract data (will take 5-10 minutes, recommend putting these lines in a separate cell)
rows = db.view('_all_docs', include_docs=True) # ,limit=10
data = [row['doc'] for row in rows]
df = pd.DataFrame(data)

In [20]:
# Separate out data types
analysis = df[df['_id'].str.contains("analysis")].reset_index()
book = df[df['_id'].str.contains("book")].reset_index()
sample = df[df['_id'].str.contains("sample")].reset_index()
institution = df[df['_id'].str.contains("institution")].reset_index()
person = df[df['_id'].str.contains("person")].reset_index()
paper = df[df['_id'].str.contains("paper")].reset_index()
catalog = df[df['_id'].str.contains("catalog")].reset_index()

# Data: Sample

def checkSitu(x,key):
    try:
        return x[key]
    except:
        return np.NaN
    
# Initial Setup
sample1 = sample.dropna(axis=1, how='all').drop('index', axis=1)
sample1['_id'] = sample1['_id'].str.replace('sample:','')

# Relabel columns
sample1.rename(columns={"_id": "sample_id"}, inplace=True)
sample1['objectId'] = sample1['objectId'].str.replace('book:','')
sample1.rename(columns={"objectId": "book_id"}, inplace=True)

#Separate out columns
sample2 = sample1[['sample_id','book_id', 'barcode', 'procedure', 'notes', 'inSitu', 'pageSampled']]

# Extract inSitu information
sample2['status']=sample2['inSitu'].apply(lambda x: checkSitu(x,'isInSitu'))
sample2['type']=sample2['inSitu'].apply(lambda x: checkSitu(x,'type'))
sample2['location']=sample2['inSitu'].apply(lambda x: checkSitu(x,'location'))
sample2['set']=sample2['inSitu'].apply(lambda x: checkSitu(x,'set'))

sample2

Unnamed: 0,sample_id,book_id,barcode,procedure,notes,inSitu,pageSampled,status,type,location,set
0,0000010000,00100,ANC001000,SOP,,{'isInSitu': False},289,False,,,
1,0000010001,00100,ANC001012,SOP,,{'isInSitu': False},291,False,,,
2,0000010002,00101,ANC001001,SOP,,{'isInSitu': False},79,False,,,
3,0000010003,00101,ANC001013,SOP,,{'isInSitu': False},81,False,,,
4,0000010004,00102,ANC001002,SOP,,{'isInSitu': False},203,False,,,
...,...,...,...,...,...,...,...,...,...,...,...
6119,0000016329,00129,,SOP,,"{'isInSitu': True, 'type': 'fors', 'location':...",151,True,fors,gutter,
6120,0000016330,00129,,SOP,,"{'isInSitu': True, 'type': 'ftir', 'location':...",151,True,ftir,edge,
6121,0000016331,00129,,SOP,,"{'isInSitu': True, 'type': 'ftir', 'location':...",151,True,ftir,inset,
6122,0000016332,10954,ANC002201,SOP,"plate, primary textblock stock","{'isInSitu': False, 'type': '', 'location': ''...",plate xxx,False,,,


In [21]:
analysis

Unnamed: 0,index,_id,_rev,access,validate_doc_update,views,language,objectId,isInSitu,sampleId,...,acquiredOn,acquiredOnIsApprox,agingTemperature,agingNatural,username,inSitu,pageSampled,sampledOn,sampledByPersonId,sampedByPersonId
0,3,analysis:0000000000,49-cedb344b53eb03cb3fc98e6cf7c17fd3,,,,,book:00112,True,sample:0000010608,...,,,,,,,,,,
1,4,analysis:0000000001,34-abd09afe1220c9836830f2c23f8d4949,,,,,book:00112,True,sample:0000010609,...,,,,,,,,,,
2,5,analysis:0000000002,18-8fa499a28006ae621cb6aa912948a83b,,,,,book:00112,True,sample:0000010610,...,,,,,,,,,,
3,6,analysis:0000000003,5-eee992d6ef6ead615c4c578e7976416f,,,,,book:00112,True,sample:0000010611,...,,,,,,,,,,
4,7,analysis:0000000004,5-7f39755f31c340d22da6e9c4771c3e84,,,,,book:00112,True,sample:0000010612,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7849,7852,analysis:0000008669,3-9376524c77afd83a06361b751aea6ab2,,,,,book:00129,True,sample:0000016327,...,,,,,,,,,,
7850,7853,analysis:0000008670,3-2939a6810b59b42a53aa625b6b0cca48,,,,,book:00129,True,sample:0000016328,...,,,,,,,,,,
7851,7854,analysis:0000008671,3-beef0b71341084fd3378ede00d5aeea8,,,,,book:00129,True,sample:0000016329,...,,,,,,,,,,
7852,7855,analysis:0000008672,2-5a1449a0955d8199b9d6c1a885bad07f,,,,,book:00129,True,sample:0000016330,...,,,,,,,,,,


In [23]:
analysis.columns

Index(['index', '_id', '_rev', 'access', 'validate_doc_update', 'views',
       'language', 'objectId', 'isInSitu', 'sampleId', 'barcode', 'type',
       'procedure', 'analysedOn', 'analysedByPersonId', 'lastModifiedOn',
       'lastModifiedByPersonId', 'notes', 'data', 'deltaE', 'location',
       'stats', 'catalogId', 'institutionId', 'assessedByPersonId',
       'receivedOn', 'returnedOn', 'catalog', 'description', 'batch',
       'createdOn', 'createdByPersonId', 'assessedOn', 'doubleFold',
       'batchFromPartner', 'condition', 'title', 'oclcNum', 'callNum',
       'dateIsApprox', 'pagesToSample', 'author', 'published', 'isbn', 'issn',
       'institutions', 'crossRefFullDataSheet', 'hideDuplicate', 'name',
       'source', 'sourceBatch', 'composition', 'sizing', 'buffer',
       'acquiredOn', 'acquiredOnIsApprox', 'agingTemperature', 'agingNatural',
       'username', 'inSitu', 'pageSampled', 'sampledOn', 'sampledByPersonId',
       'sampedByPersonId'],
      dtype='object')

In [28]:
analysis1 = analysis.set_index("_id")

In [29]:
analysis1

Unnamed: 0_level_0,index,_rev,access,validate_doc_update,views,language,objectId,isInSitu,sampleId,barcode,...,acquiredOn,acquiredOnIsApprox,agingTemperature,agingNatural,username,inSitu,pageSampled,sampledOn,sampledByPersonId,sampedByPersonId
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
analysis:0000000000,3,49-cedb344b53eb03cb3fc98e6cf7c17fd3,,,,,book:00112,True,sample:0000010608,,...,,,,,,,,,,
analysis:0000000001,4,34-abd09afe1220c9836830f2c23f8d4949,,,,,book:00112,True,sample:0000010609,,...,,,,,,,,,,
analysis:0000000002,5,18-8fa499a28006ae621cb6aa912948a83b,,,,,book:00112,True,sample:0000010610,,...,,,,,,,,,,
analysis:0000000003,6,5-eee992d6ef6ead615c4c578e7976416f,,,,,book:00112,True,sample:0000010611,,...,,,,,,,,,,
analysis:0000000004,7,5-7f39755f31c340d22da6e9c4771c3e84,,,,,book:00112,True,sample:0000010612,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
analysis:0000008669,7852,3-9376524c77afd83a06361b751aea6ab2,,,,,book:00129,True,sample:0000016327,,...,,,,,,,,,,
analysis:0000008670,7853,3-2939a6810b59b42a53aa625b6b0cca48,,,,,book:00129,True,sample:0000016328,,...,,,,,,,,,,
analysis:0000008671,7854,3-beef0b71341084fd3378ede00d5aeea8,,,,,book:00129,True,sample:0000016329,,...,,,,,,,,,,
analysis:0000008672,7855,2-5a1449a0955d8199b9d6c1a885bad07f,,,,,book:00129,True,sample:0000016330,,...,,,,,,,,,,


In [30]:
analysis1["data"]

_id
analysis:0000000000    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000000001    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000000002    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000000003    [{'type': 'raw', 'src': {'header': '', 'create...
analysis:0000000004    [{'type': 'raw', 'src': {'header': '', 'create...
                                             ...                        
analysis:0000008669    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000008670    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000008671    [{'type': 'raw', 'src': {'header': 'Wavelength...
analysis:0000008672    [{'type': 'raw', 'src': {'header': '', 'create...
analysis:0000008673    [{'type': 'raw', 'src': {'header': '', 'create...
Name: data, Length: 7854, dtype: object