# Project nr 1.2 Transform & Load
Artjom, Heidi, Kaja, Rasmus

### Installations

In [0]:
%sh pip install rake-nltk python-Levenshtein pycountry fuzzywuzzy

Collecting rake-nltk
  Downloading rake_nltk-1.0.6-py3-none-any.whl (9.1 kB)
Collecting python-Levenshtein
  Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)
Collecting pycountry
  Downloading pycountry-22.3.5.tar.gz (10.1 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Collecting nltk<4.0.0,>=3.6.2
  Downloading nltk-3.7-py3-none-any.whl (1.5 MB)
Collecting tqdm
  Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
Collecting click
  Downloading click-8.1.3-py3-none-any.whl (96 kB)
Collecting regex>=2021.8.3
  Downloading regex-2022.4.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (764 kB)
Building wheels for collected pac

### Imports

In [0]:
import time
import pyspark.sql.functions as F
from pyspark.sql import Window
from delta.tables import *
from pyspark.sql.types import *

import pycountry
import pandas as pd
import numpy as np
from Levenshtein import ratio
from fuzzywuzzy import process
import nltk
from rake_nltk import Rake
import requests

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
Out[2]: True

### Import additional data

In [0]:
disciplines = spark.read.format("csv").option("inferSchema", "false").option("header", "true").option("sep", ",").load("/FileStore/tables/disciplines.csv")

disciplines_df = disciplines.toPandas()

disc_table = disciplines_df.drop(["Level1","Level3"],axis=1).drop_duplicates().reset_index(drop=True).reset_index()
disc_table.columns = ["FieldOfStudyID","FieldOfStudy"]

fieldOfStudies = spark.createDataFrame(disc_table)

(fieldOfStudies
 .select("FieldOfStudyID","FieldOfStudy")
 .dropDuplicates().write.format("delta").mode("overwrite").option("overwriteSchema", "true")
 .saveAsTable("sourceFieldOfStudies"))

## T1. Define transformation steps

### Step 0. Read json-file into Spark Dataframe

In [0]:
df_schema = StructType([
    StructField("_id",StringType(),True),
    StructField("abstract",StringType(),True),
    StructField("authors",ArrayType(
        StructType([
            StructField("_id",StringType(),True),
            StructField("bio",StringType(),True),
            StructField("email",StringType(),True),
            StructField("gid",StringType(),True),
            StructField("name",StringType(),True),
            StructField("name_zh",StringType(),True),
            StructField("oid",StringType(),True),
            StructField("oid_zh",StringType(),True),
            StructField("orcid",StringType(),True),
            StructField("org",StringType(),True),
            StructField("org_zh",StringType(),True),
            StructField("orgid",StringType(),True),
            StructField("orgs",ArrayType(StringType(),True),True),
            StructField("sid",StringType(),True)]),True),True),
    StructField("doi",StringType(),True),
    StructField("fos",ArrayType(StringType(),True),True),
    StructField("isbn",StringType(),True),
    StructField("issn",StringType(),True),
    StructField("issue",StringType(),True),
    StructField("keywords",ArrayType(StringType(),True),True),
    StructField("lang",StringType(),True),
    StructField("n_citation",LongType(),True),
    StructField("page_end",StringType(),True),
    StructField("page_start",StringType(),True),
    StructField("pdf",StringType(),True),
    StructField("references",ArrayType(StringType(),True),True),
    StructField("title",StringType(),True),
    StructField("url",ArrayType(StringType(),True),True),
    StructField("venue",StructType([
        StructField("_id",StringType(),True),
        StructField("issn",StringType(),True),
        StructField("name",StringType(),True),
        StructField("name_d",StringType(),True),
        StructField("name_s",StringType(),True),
        StructField("online_issn",StringType(),True),
        StructField("publisher",StringType(),True),
        StructField("raw",StringType(),True),
        StructField("raw_zh",StringType(),True),
        StructField("sid",StringType(),True),
        StructField("src",StringType(),True),
        StructField("t",StringType(),True),
        StructField("type",LongType(),True)
    ]),True),
    StructField("volume",StringType(),True),
    StructField("year",LongType(),True)])

In [0]:
def createDF(filename,df_schema):
    data = spark.read.schema(df_schema).option("multiline",True).json(filename) # "/tmp/data/dblpv13_1.json"
    return data

### Step 1. Select data

In [0]:
def selection1(data):
    selection = data.select(
        F.col("_id").alias("PublicationID"),
        F.col("title").alias("Title"),
        F.col("year").alias("Year"),
        F.col("n_citation").cast("integer").alias("NbrOfCitations"),
        F.col("doi").alias("DOI"),
        F.col("authors"), 
        F.col("venue._id").alias("VenueID"),
        F.col("venue.name_d").alias("VenueName"),
        F.col("venue.raw").alias("VenueAbreviation"),
        F.col("lang").alias("Language"),
        F.col("keywords").alias("Keywords"), 
        F.col("fos").alias("FieldOfStudies"),
        F.col("references").alias("References")
    )
    return selection

### Step 2. Drop high level duplicates

In [0]:
  def drop1(data):
    drop1 = data.dropDuplicates()
    return drop1

### Step 3. First filter

In [0]:
def filter1(data):
    filtered = (data
                    .filter(F.col("PublicationID").isNotNull())
                    .filter(F.col("Title").isNotNull())
                    .filter(F.col("NbrOfCitations").isNotNull())
                    .filter(F.col("DOI").isNotNull())
                    .filter(F.col("DOI")!="")
                    .filter(F.size(F.col("authors.name"))>0) # author's name must be longer than 0 symbols
                    .filter(F.length(F.col("Title"))>20) # title must be longer than 20 symbols
                    .filter(F.size(F.split(F.col("Title")," ")) >1)) # title must be longer than 1 word
    return filtered

### Step 4. Augmenter: get publication type

In [0]:
def getType(doi):
    try:
        response = requests.get(url = 'https://api.crossref.org/works/' + str(doi) + '?mailto=martinsaari@me.com') # polite
        data = response.json()
        if ('type' in data['message']):
            return data['message']['type']
    except:
        return None

getTypeUDF = udf(getType)

In [0]:
def augmenter1(data):
    augment = (data
               .withColumn("Type",getTypeUDF(F.col("DOI")))
               .filter(F.col("Type").isNotNull())
              )
    return augment

### Step 5. Second filter: Find accurate FOS from Disciplines dictionary

In [0]:
l3 = list(disciplines_df.Level3.fillna('nothing'))
l2 = list(disciplines_df.Level2)

def singleFos(words):
    best, rat0 = 600, 0  
    try:
        for i,word in enumerate(words):
            ratios = [ratio( word.lower(), l.lower()) for l in l2+l3]
            rat = np.max(ratios)
            if rat > rat0:
                rat0, best = rat, np.argmax(ratios)%len(l2) #since ratios length is doubled
        ind = disc_table['FieldOfStudy']==l2[best]
        return str(disc_table[ind]['FieldOfStudyID'].values[0]) #without int casting we get spark error
    except: return None

getSingleFosUDF = udf(singleFos)

In [0]:
def filter2(data):
    filtered2 = (data
                 .withColumn("FieldOfStudyID",getSingleFosUDF(F.col("FieldOfStudies"))))
    return filtered2

### Step 6. Third filter

In [0]:
def getKeyword(keylist):
    r = Rake()
    try:
        r.extract_keywords_from_sentences(keylist)
        return r.get_ranked_phrases_with_scores()[0][1]
    except: return None

getKeywordUDF = udf(getKeyword)

In [0]:
def filter3(data):
    filtered3 = (data
                     .withColumn("Keyword",getKeywordUDF(F.col("Keywords"))))
    return filtered3

### Step 7. Explode Author data

In [0]:
def explosion(data):
    window_authorRank = Window.partitionBy("PublicationID").orderBy("AuthorID")
    explosion = (data
                  .select("*", F.posexplode_outer("authors")) #posexplode creates two columns: pos (position) and col (value at position)
                  .drop("authors")
                  .select(
                      "*",
                      F.trim("col._id").alias("AuthorID"),
                      F.trim("col.name").alias("AuthorName"),
                      F.trim("col.orgid").alias("OrganizationID"),
                      F.trim("col.org").alias("Organization"),
                  )
                  .drop("col")
                  .withColumn("AuthorRank",F.col("pos")+1)
                  .drop("pos")
                  .filter(F.col("AuthorID").isNotNull())
                  .filter(F.col("AuthorName").isNotNull())
                  .withColumn("FirstName", F.element_at(F.split(F.col('AuthorName')," "),1) )
                  .withColumn("LastName", F.element_at(F.split(F.col('AuthorName')," "),-1) )
                  .withColumn("MiddleName", F.col('AuthorName').substr(F.length("FirstName")+2, F.length("AuthorName")-F.length("FirstName")-F.length("LastName")-2))
                  .drop("AuthorName"))
    return explosion

### Step 8. Last selection and augmentation

In [0]:
def getCountry(text): #gets country name from text
    if text != None:
        for c in pycountry.countries:
            if c.name in text:
                return c.name
    return None

getCountryUDF = udf(getCountry)

In [0]:
def selection2(data):
    selection2 = (data
                 .withColumn("OrgCountry",F.when(F.col("Organization").isNotNull(),getCountryUDF(F.col("Organization"))))
                 .select("PublicationID", "Title", "Year", "NbrOfCitations", "DOI", "Type",
                         "Language", "FieldOfStudyID", "Keyword", "References",
                         "VenueID","VenueName","VenueAbreviation",
                         "AuthorID", "FirstName", "MiddleName", "LastName",
                         "OrganizationID", "Organization", "OrgCountry",
                         "AuthorRank")
                 .replace("",None))
    return selection2

## T2. Gather Transformation Rules into one Process

In [0]:
def transformation_rules(filename,df_schema):
    data = createDF(filename,df_schema)
    data = selection1(data) # step 1: first selection
    data = drop1(data) # step 2: first high level drop
    data = filter1(data) # step 3: main filters
    data = augmenter1(data) # step 4: get Type
    data = filter2(data) # step 5: get FOS
    data = filter3(data) # step 6: get main Keyword
    data = explosion(data) # step 7: explode author data
    data = selection2(data) # step 8: final selection and organization country
    print("... transformation rules defined")
    return data

## T3. End transformation in temporary tables
1. Split data
2. Delete duplicates
3. Write into temporary tables

In [0]:
def create_source_rank(data):
    (data
        .select("PublicationID", "AuthorID", "OrganizationID", "VenueID", "Type", "Language", "Keyword", "FieldOfStudyID", "AuthorRank")
        .write.format("delta").mode("overwrite")
        .option("overwriteSchema", "true")
        .partitionBy("FieldOfStudyID")
        .saveAsTable("sourceAuthorRank"))
    spark.sql("OPTIMIZE sourceAuthorRank")

In [0]:
def create_source_ref(data):    
    (data
        .filter(F.col("References").isNotNull())
        .select("PublicationID", F.explode("References").alias("RefPublicationID"))
        .dropDuplicates()
        .write.format("delta").mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable("sourceRef"))

In [0]:
sourceTables = {"sourcePublication": ["PublicationID","Title","DOI","Year","NbrOfCitations"],
                "sourceAuthor": ["AuthorID","FirstName","MiddleName","LastName"],
                "sourceOrganization": ["OrganizationID","Organization","OrgCountry"],
                "sourceVenue": ["VenueID","VenueName","VenueAbreviation"],
                "sourceType": ["Type"],
                "sourceLanguage": ["Language"],
                "sourceKeyword": ["Keyword"]
                # "sourceReferences": ["PublicationID","RefPublicationID"]
                # "sourceAuthorRank": ["PublicationID", "AuthorID", "OrganizationID", "VenueID", "Language", "Keyword", "FieldOfStudyID", "AuthorRank"]
               }

def create_source_tables(data, sourceTables):
    for tableName in sourceTables:
        columns = sourceTables[tableName]
        (data
         .select(columns).dropDuplicates()
         .write.format("delta").mode("overwrite").option("overwriteSchema", "true")
         .saveAsTable(tableName))
        print("... ", tableName, " ready")
    create_source_rank(data)
    print("... sourceAuthorRank ready")
    create_source_ref(data)
    print("... sourceReferences ready")

## L1. Create loading scripts

##### Example:

```
CREATE TABLE IF NOT EXISTS publication (
ID BIGINT GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1),
PublicationID INT,
Title STRING,
DOI STRING,
Year INT,
NbrOfCitations INT
) USING DELTA LOCATION 'dbfs:/tmp/data/warehouse/publication';

MERGE INTO publication tgt
USING sourcePublication src
ON tgt.PublicationID = src.PublicationID
AND tgt.DOI = src.DOI
WHEN NOT MATCHED THEN INSERT (PublicationID, Title, DOI, Year, NbrOfCitations)
VALUES (src.PublicationID, src.Title, src.DOI, src.Year, src.NbrOfCitations)
````

##### CREATE TABLE syntax: 
https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-syntax-ddl-create-table-using.html#syntax

In [0]:
delta_schema = {"publication": {"PublicationID": "STRING","Title": "STRING","DOI": "STRING","Year": "INT","NbrOfCitations": "INT"},
                "author": {"AuthorID": "STRING","FirstName": "STRING","MiddleName": "STRING","LastName": "STRING"},
                "organization": {"OrganizationID": "STRING","Organization": "STRING","OrgCountry": "STRING"},
                "venue": {"VenueID": "STRING","VenueName": "STRING","VenueAbreviation": "STRING"},
                "type": {"Type": "STRING"},
                "language": {"Language": "STRING"},
                "keyword": {"Keyword": "STRING"},
                "fieldofstudies": {"FieldOfStudyID": "INT", "FieldOfStudy": "STRING"}
               }

joining_cols = {"publication": ["PublicationID", "DOI"],
                "author": ["AuthorID"],
                "organization": ["OrganizationID"],
                "venue": ["VenueID"],
                "type": ["Type"],
                "language": ["Language"],
                "keyword": ["Keyword"],
                "fieldofstudies": ["FieldOfStudyID"]
               }

def create_create_queries(delta_schema):
    queries_create = []
    create = "CREATE TABLE IF NOT EXISTS "
    generate_id = "ID BIGINT GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1), "
    partition = ") PARTITIONED BY ("
    location = ") USING DELTA LOCATION '/tmp/data/warehouse/"
    for table_name in delta_schema:
        query = create + table_name + " (" + generate_id
        for col in delta_schema[table_name]:
            query += col + " " + delta_schema[table_name][col] + ", "
        query = query[:-2] + location + table_name + "';"
        queries_create.append(query)
    query_fact = """
    CREATE TABLE IF NOT EXISTS authorrank (
    PublicationID INT, AuthorID INT, OrganizationID INT, VenueID INT, TypeID INT, 
    LanguageID INT, KeywordID INT, FieldOfStudyID INT, AuthorRank INT)
    USING DELTA LOCATION '/tmp/data/warehouse/authorrank'
    PARTITIONED BY (FieldOfStudyID);"""
    queries_create.append(query_fact)
    query_ref = """
    CREATE TABLE IF NOT EXISTS references (
    PublicationID INT, RefPublicationID INT)
    USING DELTA LOCATION '/tmp/data/warehouse/references';"""
    queries_create.append(query_ref)
    return queries_create

##### MERGE INTO syntax:
https://docs.databricks.com/delta/quick-start.html#create-a-table

In [0]:
def create_merge_queries(delta_schema,joining_cols):
    queries_merge = []
    merge = "MERGE INTO "
    using = "USING "
    when = " WHEN NOT MATCHED THEN INSERT ("
    for table_name in delta_schema:
        query = merge + table_name + " tgt " + using + "source" + table_name + " src ON "
        for col in joining_cols[table_name]:
            query += "tgt." + col + " = src." + col + " AND "
        query = query[:-5] + when
        for col in delta_schema[table_name]:
            query += col + ", "
        query = query[:-2] + ") VALUES ("
        for col in delta_schema[table_name]:
            query += "src." + col + ", "
        query = query[:-2] + ");"
        queries_merge.append(query)
    query_merge_fact = """
    MERGE INTO authorrank tgt
    USING ( SELECT publ.ID PublicationID, auth.ID AuthorID, org.ID OrganizationID, ven.ID VenueID, typ.ID TypeID, lang.ID LanguageID, 
            kw.ID KeywordID, fos.ID FieldOfStudyID, rank.AuthorRank AuthorRank
            FROM sourceAuthorRank rank
            LEFT JOIN publication publ ON publ.PublicationID = rank.PublicationID LEFT JOIN author auth ON auth.AuthorID = rank.AuthorID
            LEFT JOIN organization org ON org.OrganizationID = rank.OrganizationID LEFT JOIN venue ven ON ven.VenueID = rank.VenueID
            LEFT JOIN type typ ON typ.Type = rank.Type LEFT JOIN language lang ON lang.Language = rank.Language 
            LEFT JOIN keyword kw ON kw.Keyword = rank.Keyword LEFT JOIN fieldofstudies fos ON fos.FieldOfStudyID = rank.FieldOfStudyID 
            WHERE publ.ID is not null AND auth.ID is not null 
          ) src
    ON tgt.PublicationID = src.PublicationID AND tgt.AuthorID = src.AuthorID 
    AND tgt.OrganizationID = src.OrganizationID AND tgt.VenueID = src.VenueID
    AND tgt.FieldOfStudyID = src.FieldOfStudyID
    WHEN NOT MATCHED THEN INSERT (PublicationID, AuthorID, OrganizationID, VenueID, TypeID, LanguageID, KeywordID, FieldOfStudyID, AuthorRank) 
    VALUES (src.PublicationID, src.AuthorID, src.OrganizationID, src.VenueID, src.TypeID, src.LanguageID, src.KeywordID, src.FieldOfStudyID, src.AuthorRank);
    """
    queries_merge.append(query_merge_fact)
    query_merge_ref = """
    MERGE INTO references tgt
    USING ( SELECT publ.ID PublicationID, publr.ID RefPublicationID
            FROM sourceRef ref
            LEFT JOIN publication publ ON publ.PublicationID = ref.PublicationID
            LEFT JOIN publication publr ON publr.PublicationID = ref.RefPublicationID
          ) src
    ON tgt.PublicationID = src.PublicationID AND tgt.RefPublicationID = src.RefPublicationID
    WHEN NOT MATCHED THEN INSERT (PublicationID, RefPublicationID)
    VALUES (src.PublicationID, src.RefPublicationID); 
    """
    queries_merge.append(query_merge_ref)
    return queries_merge

## L2. Populate DW

In [0]:
def populate_warehouse(delta_schema,joining_cols):
    queries_create = create_create_queries(delta_schema)
    queries_merge = create_merge_queries(delta_schema,joining_cols)
    for query in queries_create: 
        spark.sql(query)
        print("... table ", query.strip().split(" ")[5].strip(), " created")
    for query in queries_merge: 
        spark.sql(query)
        print("... table ", query.strip().split(" ")[2].strip(), " populated")

## Transform & Load

In [0]:
def transform_and_load(fileName, df_schema, sourceTables, delta_schema, joining_cols):
    
    print(f"Transform the file {fileName} ... ")
    start = time.time()
    folder = 'dbfs:/tmp/data/source/'
    data = transformation_rules(folder+fileName,df_schema)
    print("Nbr of Records: ", data.count())
    create_source_tables(data, sourceTables)
    print("Transformation time: ",time.time()-start)
    
    print("Load data to DW ... ")
    start = time.time()
    populate_warehouse(delta_schema,joining_cols)
    print("Loading time: ",time.time()-start)

## EXECUTE!

In [0]:
for nbr in range(10,100):
    dbutils.fs.cp(f"file:/databricks/driver/data/dblpv13i_{nbr}.json","dbfs:/tmp/data/source") # execute notebook P1.1 before that ..
    transform_and_load(f'dblpv13i_{nbr}.json', df_schema, sourceTables, delta_schema, joining_cols)
    dbutils.fs.rm(f"dbfs:/tmp/data/sources/dblpv13_{nbr}.json") # when warehouse populated with information in file, then delete

Transform the file dblpv13i_10.json ... 
... transformation rules defined
Nbr of Records:  325
...  sourcePublication  ready
...  sourceAuthor  ready
...  sourceOrganization  ready
...  sourceVenue  ready
...  sourceType  ready
...  sourceLanguage  ready
...  sourceKeyword  ready
... sourceAuthorRank ready
... sourceReferences ready
Transformation time:  683.2719354629517
Load data to DW ... 
... table  publication  created
... table  author  created
... table  organization  created
... table  venue  created
... table  type  created
... table  language  created
... table  keyword  created
... table  fieldofstudies  created
... table  authorrank  created
... table  references  created
... table  publication  populated
... table  author  populated
... table  organization  populated
... table  venue  populated
... table  type  populated
... table  language  populated
... table  keyword  populated
... table  fieldofstudies  populated
... table  authorrank  populated
... table  references  

In [0]:
%sql show tables

database,tableName,isTemporary
default,sourceauthor,False
default,sourceauthorrank,False
default,sourcefieldofstudies,False
default,sourcekeyword,False
default,sourcelanguage,False
default,sourceorganization,False
default,sourcepublication,False
default,sourceref,False
default,sourcetype,False
default,sourcevenue,False


## Streaming files

In [0]:
#df_stream = (spark.readStream
#             .format("json")
#             .schema(df_schema)
#             .option("maxFilesPerTrigger", 10)
#             .load("dbfs:/tmp/data/source/"))



In [0]:
#display(df_stream)



In [0]:
#for stream in spark.streams.active:
#    stream.stop()



In [0]:
display(createDF('file:/databricks/driver/data/dblpv13i_3.json',df_schema))

_id,abstract,authors,doi,fos,isbn,issn,issue,keywords,lang,n_citation,page_end,page_start,pdf,references,title,url,venue,volume,year
53e9978ab7602d9701f4b2c8,,"List(List(53f45ed5dabfaee02ad774e1, null, null, null, Ayanna Howard, null, null, null, null, null, null, null, null, null))",,,,,,List(),en,0.0,,,,,Organizers.,"List(http://www.aaai.org/ocs/index.php/WS/ROBOT10/paper/view/2374, db/conf/aaai/middle2010.html#Howard10)","List(53a725ac20f7420be8b556b9, null, null, null, null, null, null, Enabling Intelligence through Middleware, null, null, null, null, null)",,2010
53e9978ab7602d9701f4b2c9,"New and improved data hiding techniques pose a problem for forensic analyst investigating computer crime. Computer criminals are able to hide information using stego-channels available in commonly used document formats, thereby hindering an investigator from acquiring possible important evidence. In this paper, we focus on detecting the use of stego-channels in the unused or dead space regions in the Object Linking and Embedding 2 (OLE2) specification used primarily by Microsoft's Office. The OleDetection algorithm [19] presented in this paper is focused on detecting the use of these stego-channels using a three-step process comprising the detection of dead regions in a document, the extraction of binary data and the generation of appropriate statistics using kurtosis and byte-frequency distribution, and the comparison of the calculated statistics with threshold values, which determines whether or not the document contains hidden data. This algorithm extends the work done by the StegOle algorithm [3]. Our experimental results shows that the OleDetection algorithm can correctly identify 99.97 percent of document with previous stego-channel techniques with a flase positive rate of only 0.65 percent. In addition, we present an anti-forensic techniques wherein OLE2 documents can be modified to hide data with greater detection avoidance characteristics [19]; thus reducing the accuracy of the current OleDetection implementation.","List(List(54857adadabfaed7b5fa2277, null, Robert.Erbacher@usu.edu, null, Robert F. Erbacher, null, null, null, null, Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA, null, null, List(Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA), 1081170), List(53f43551dabfaeecd695611b, null, null, null, Jason Daniels, null, null, null, null, Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA, null, null, List(Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA), 34489772), List(53f46827dabfaee4dc855145, null, null, null, Steena Dominica Steven Monteiro, null, null, null, null, Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA, null, null, List(Utah State Univ, Dept Comp Sci, Logan, UT 84322 USA), 5203193))",10.1109/SADFE.2009.18,,,,,"List(Forensics, Anti-Forensics, steganography, Covert Channels, OLE2)",en,0.0,96,85,,,OleDetection,"List(http://doi.ieeecomputersociety.org/10.1109/SADFE.2009.18, http://www.webofknowledge.com/)","List(null, null, null, null, null, null, null, International Workshop on Systematic Approaches to Digital Forensic Engineering SADFE, null, International Workshop on Systematic Approaches to Digital Forensic Engineering SADFE, null, J, null)",,2009
53e9978ab7602d9701f4b2ca,,"List(List(53f42bd2dabfaedd74d1ef8e, null, null, null, Klaus Marquardt, null, null, null, null, null, null, null, null, null))",,,,,,List(),en,0.0,478,459,,,Overthreading.,List(http://hillside.net/europlop/europlop2007/workshops/G2.pdf),"List(5550371b7cea80f954173a4b, null, null, null, null, null, null, EuroPLoP, null, null, null, null, 0)",,2007
53e9978ab7602d9701f4b2cb,,"List(List(53f445a6dabfaec09f1c665c, null, null, null, Christoph Benzmüller, null, null, null, null, null, null, null, null, 976236), List(54876524dabfae8a11fb38d7, null, null, null, Armin Fiedler, null, null, null, null, null, null, null, null, 1743107), List(54083194dabfae44f086dd1e, null, null, null, Andreas Meier, null, null, null, null, null, null, null, null, 282170), List(53f4750edabfaee43ed3004e, null, null, null, Martin Pollet, null, null, null, null, null, null, null, null, 2115104), List(5484e43edabfae9b401331fd, null, null, null, Jörg H. Siekmann, null, null, null, null, null, null, null, null, 380199))",10.1007/11542384_17,,,,,List(),en,1.0,141,127,,,Omega,List(http://dx.doi.org/10.1007/11542384_17),"List(53a727a920f7420be8b9734c, null, null, null, null, null, null, The Seventeen Provers of the World, null, null, null, null, 0)",,2006
53e9978ab7602d9701f4b2cf,,"List(List(53f4ce36dabfaeed23f81401, null, null, null, Klaus Benecke, null, null, null, null, null, null, null, null, 2076622), List(53f46b63dabfaedf43659566, null, null, null, Martin Schnabel, null, null, null, null, null, null, null, null, 9272483))",,,,,,List(),en,0.0,583,580,,,OttoQL,List(http://subs.emis.de/LNI/Proceedings/Proceedings144/article5245.html),"List(555037657cea80f954184ecd, null, null, null, null, null, null, BTW, null, null, null, null, 0)",,2009
53e9978ab7602d9701f4afd6,Without Abstract,"List(List(54843e4adabfae8a11fb1f8a, null, null, null, John L. Pollock, null, null, null, null, null, null, null, null, null))",10.1007/3-540-52885-7_134,,0-387-52885-7,,,List(),en,13.0,670,669,,,OSCAR,List(http://dx.doi.org/10.1007/3-540-52885-7_134),"List(53a72b1320f7420be8c18a7b, null, null, null, null, null, null, CADE, null, null, null, null, null)",,1990
53e9978ab7602d9701f4afda,,"List(List(53f4c9cddabfaee9c5f80907, null, null, null, J. L. Adams, null, null, null, null, null, null, null, null, null))",10.1016/0169-7552(94)90044-2,,,,6-8,List(),en,12.0,784,771,,,Orwell,List(http://dx.doi.org/10.1016/0169-7552(94)90044-2),"List(53a72fc720f7420be8cc3076, null, null, null, null, null, null, Computer Networks and ISDN Systems, null, null, null, null, 0)",26,1994
53e9978ab7602d9701f4afde,,"List(List(53f473c5dabfaedd74e9ea0a, null, null, 5b86b41be1cd8e14a3393ebd, Joshua Cordes, null, null, null, null, School Of Visual Arts, 6 Leland Court, Centereach, New York, null, 5f71b2931c455f439fe3ccf2, null, null))",10.1145/259081.259453,,0-89791-921-1,,,List(),en,0.0,281,281,,,Once,"List(http://dx.doi.org/10.1145/259081.259453, http://doi.acm.org/10.1145/259081.259453)","List(null, null, null, null, null, null, null, SIGGRAPH Visual Proceedings, null, null, null, null, 10)",,1997
53e9978ab7602d9701f4afdf,,"List(List(53f430b8dabfaeb1a7bb5895, null, null, null, Zak Margolis, null, null, null, null, null, null, null, null, null))",10.1145/281388.281951,,1-58113-045-7,1756-1833,7172,List(),en,0.0,1563B,1563B,,,Orifices,"List(http://dx.doi.org/10.1145/281388.281951, http://www.ncbi.nlm.nih.gov/pubmed/9836658?report=xml&format=text, http://doi.acm.org/10.1145/281388.281951)","List(5736ae3ad39c4f40a797600a, null, null, null, null, null, null, SIGGRAPH Electronic Art and Animation Catalog, null, null, null, null, 10)",317,1998
53e9978ab7602d9701f4afe0,(Am J Obstet Gynecol 1997;176:s225-6.),"List(List(53f44af3dabfaeecd69bf288, null, null, null, Linda Rising, null, null, null, null, null, null, null, null, null))",10.1109/SRII.2011.6,,0-521-64818-1,Metabolism,27,"List(onc, oncogenes, cancer, apoptosis, tumor suppressor genes, tumor viruses, molecular oncology, cell cycle, growth factors, growth factor receptors, growth regulatory genes, kidney, renal, nephrology, dialysis, hypertension, urology, transplantation, diabetes, clinico-pathological, KI, nature journals, nature publishing group, International Society of Nephrology, ISN)",en,0.0,,,//static.aminer.org/pdf/PDF/003/079/374/overview.pdf,,Overview,"List(http://dx.doi.org/10.1109/SRII.2011.6, http://doi.acm.org/10.1145/266231.266233, http://dl.acm.org/citation.cfm?id=1860924.1860926&coll=DL&dl=GUIDE&CFID=686618441&CFTOKEN=83291212&preflayout=flat, http://dl.acm.org/citation.cfm?id=2376369.2377182&coll=DL&dl=GUIDE&CFID=521782820&CFTOKEN=47736761&preflayout=flat, http://dl.acm.org/citation.cfm?id=2015552.2015757&coll=DL&dl=GUIDE&CFID=521930607&CFTOKEN=12202095&preflayout=flat, http://dx.doi.org/doi:10.1038/sj.onc.1209546, http://www.nature.com/onc/journal/v25/n27/full/1209546a.html, http://dx.doi.org/doi:10.1046/j.1523-1755.1998.06802.x, http://www.nature.com/ki/journal/v54/n68s/full/4490537a.html)","List(555036f67cea80f95416aa04, null, null, null, null, null, null, ACM StandardView, null, null, null, null, 0)",52,2011
