# Simple Zinc Interaction Analysis Example

<img src="./figures/zinc_interaction.png" style="width: 300px;"/>

## Imports

In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.datasets import groupInteractionExtractor
from mmtfPyspark.io import MmtfReader
from mmtfPyspark.webfilters import blastCluster

## Configure Spark

In [2]:
conf = SparkConf().setMaster("local[*]") \
                  .setAppName("simpleZincInteractionDemo")

sc = SparkContext(conf = conf)

## Read PDB in MMTF format

In [3]:
path = "../../resources/mmtf_full_sample/"

pdb = MmtfReader.readSequenceFile(path, sc)

# Use only representative structures

In [4]:
sequenceIdentity = 40

pdb = pdb.filter(blastCluster(sequenceIdentity))

## Extract proteins with Zn interactions

In [5]:
finder = groupInteractionExtractor("ZN",3)

interactions = finder.getDataset(pdb).cache()

## List the top 10 residue types that interact with Zn

In [6]:
interactions.printSchema()

interactions.show(20)

print(f"Number of interactions: {interactions.count()}")

root
 |-- structureId: string (nullable = false)
 |-- residue1: string (nullable = false)
 |-- atom1: string (nullable = false)
 |-- element1: string (nullable = false)
 |-- index1: integer (nullable = false)
 |-- residue2: string (nullable = false)
 |-- atom2: string (nullable = false)
 |-- element2: string (nullable = false)
 |-- index2: integer (nullable = false)
 |-- distance: float (nullable = false)

+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
|structureId|residue1|atom1|element1|index1|residue2|atom2|element2|index2| distance|
+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
|       5B4E|      ZN|   ZN|      Zn|   303|     CYS|   SG|       S|     1|2.1902573|
|       5B4E|      ZN|   ZN|      Zn|   303|     CYS|   SG|       S|     4| 2.206233|
|       5B4E|      ZN|   ZN|      Zn|   303|     CYS|   SG|       S|    20|2.2135682|
|       5B4E|      ZN|   ZN|      Zn|   303|     HIS|  ND1|       N|    23

## Show the top 10 interacting groups

In [9]:
interactions.groupBy("residue2") \
            .count() \
            .sort("count", ascending = False) \
            .show(10)

+--------+-----+
|residue2|count|
+--------+-----+
|     CYS|67046|
|     HIS|66364|
|     ASP|24930|
|     GLU|18801|
|     HOH|14733|
|     ACT| 1186|
|     LYS|  921|
|     KCX|  794|
|      CL|  753|
|     PO4|  687|
+--------+-----+
only showing top 10 rows



## Terminate Spark

In [None]:
sc.stop()