In [None]:
import pandas as pd

def mapping_input(file, SampleID, SourceSink, Env, Sink=[], Env_add=[], output_name="1"):
    """
    The purpose of this function is to take a large mapping file and choose which columns
    will be made to work for sourcetracker. This means that the input will be the file first
    followed by which columns will represent sampleID (must all be unique), SourceSink, and 
    Environment or Env. Column names are all that are necessary, also this allows for a sink 
    or multiple sinks to be designated in list format. Additionally output name can be added
    for further use and clarification of use.
    """
    """
    Required flags are the otu file,
    the sampleID column, sourcesink column, env column, any sinks (in env column)
    , any environments to be added (would need 1 per row), and possibly an output name
    """
    mapping = pd.read_csv(file)
    columns_titles = ["SampleID","SourceSink","Env"]
    if Env_add!=[]:
        for i in range(len(Env_add)):
            mapping[Env]=mapping[Env]+mapping[Env_add[i]]
    mapping.rename(columns = {SampleID:'SampleID',SourceSink:'SourceSink',Env:'Env'}, inplace = True)
    mapping=mapping.reindex(columns=columns_titles)
    for k in range(len(mapping['SourceSink'])):
        mapping['SourceSink'][k]='source'
    for i in range(len(mapping['Env'])):
        for j in Sink:
            if mapping['Env'][i]==j:
                mapping['SourceSink'][i]='sink'
    mapping.to_csv(file[:-4]+output_name+'.txt',sep='\t', index=False)
    return mapping


def otu_input(file,Transpose=False, output_name="1"):
    """
    This is for reformatting otu files from base in order to 
    make sure they are compatable with sourcetracker.
    Allows for solving indexing problems and renaming files
    when needed.
    """
    otu = pd.read_csv(file,index_col=False)
    otu  =otu.set_index("OTUs")
    if Transpose == True:
        otu = otu.T
    otu.to_csv(file[:-4] + output_name + '.txt', sep = '\t', index = True)
    return otu


def match_map_otu(mapping_file, otu_file):
    """
    match_map_otu is intended to drop any ID's in the mapping file
    that are not in the otu file. I know that this should not happen
    but from experience it does.
    """
    map_table=pd.read_table(mapping_file)
    otu_table=pd.read_table(otu_file)
    otu_T=otu_table.T
    missing=[]
    for i in range(len(map_table['SampleID'])):
        inside=0
        for j in range(len(otu_T['OTUs'])):
            if map_table['SampleID'][i]==otu_T['OTUs'][j]:
                inside=1
        if inside==0:
            missing.append(i)
    map_table=map_table.drop(index=missing)
    map_table.to_csv(file,sep='\t', index=False)
    return map_table


def mapping_to_mapping(map_file, dropped=[], #added=[[]],
                       source2sink=[],sink2source=[],
                       output_name="1"):
    """
    This code should take a preexisting mapping file
    and alter it to drop rows, change sinks to sources,
    sources to sinks, and the name of the file to better match
    what the user would want.
    """
    mapping = pd.read_table(map_file)
    for k in source2sink:
        for l in range(len(mapping['Env'])):
            if mapping["Env"][l]==k:
                mapping['SourceSink'][l]='sink'
    for m in sink2source:
        for n in range(len(mapping['Env'])):
            if mapping["Env"][n]==m:
                mapping['SourceSink'][n]='source'
    for i in dropped:
        mapping = mapping.drop(mapping[(mapping['Env'] == i)].index)
        
    if (map_file[-5]=="1" and output_name=="1"):
        output_name = "2"
    
    mapping.to_csv(map_file[:-4]+output_name+'.txt',sep='\t', index=False)
    return mapping


def drop_items(map_file,dropped=[],output_name="1"):
    """
    This code drops particular observations from the mapping file for either
    a further sourcetracker run or for later plotting purposes.
    """
    mapping = pd.read_table(map_file)
    for i in dropped:
        mapping = mapping.drop(mapping[(mapping['SampleID'] == i)].index)
    mapping.to_csv(map_file[:-4]+output_name+'.txt',sep='\t', index=False)
    return mapping

def merge_env(map_file, merge,output_name="1"):
    """
    Mapping file input
    merge should start with prefered source name
    Should be in a list
    """
    mapping = pd.read_table(map_file)
    for i in merge:
        for l in range(len(mapping["Env"])):
            if mapping["Env"][l]==i:
                mapping["Env"][l]=merge[0]
    mapping.to_csv(map_file[:-4]+output_name+'.txt',sep='\t', index=False)
    return mapping