In [1]:
!pip install py-cdhit
from pycdhit import cd_hit, read_clstr

Collecting py-cdhit
  Downloading py_cdhit-1.0.0-py3-none-any.whl (20 kB)
Installing collected packages: py-cdhit
Successfully installed py-cdhit-1.0.0


In [2]:
!git clone https://github.com/Yichuan0712/MUTargetCLEAN0513.git

Cloning into 'MUTargetCLEAN0513'...
remote: Enumerating objects: 4439, done.[K
remote: Counting objects: 100% (979/979), done.[K
remote: Compressing objects: 100% (329/329), done.[K
remote: Total 4439 (delta 655), reused 972 (delta 648), pack-reused 3460[K
Receiving objects: 100% (4439/4439), 33.14 MiB | 17.78 MiB/s, done.
Resolving deltas: 100% (2876/2876), done.


In [3]:
import numpy as np
import pandas as pd

def split_protein_sequence(prot_id, sequence, targets):
    fragment_length = 1024 - 2  # configs.encoder.max_len - 2
    overlap = 200 #  configs.encoder.frag_overlap
    fragments = []
    target_frags = []
    id_frags = []
    sequence_length = len(sequence)
    start = 0
    ind = 0

    while start < sequence_length:
        end = start + fragment_length
        if end > sequence_length:
            end = sequence_length
        fragment = sequence[start:end]
        target_frag = targets[:, start:end]
        if target_frag.shape[1] < fragment_length:
            pad = np.zeros([targets.shape[0], fragment_length-target_frag.shape[1]])
            target_frag = np.concatenate((target_frag, pad), axis=1)
        target_frags.append(target_frag)
        fragments.append(fragment)
        id_frags.append(prot_id+"@"+str(ind))
        ind += 1
        if start + fragment_length > sequence_length:
            break
        start += fragment_length - overlap

    return id_frags, fragments, target_frags

def prepare_samples(csv_file):
    # label2idx = {"Nucleus":0, "ER":1, "Peroxisome":2, "Mitochondrion":3, "Nucleus_export":4,
    #              "dual":5, "SIGNAL":6, "chloroplast":7, "Thylakoid":8}
    label2idx = {"Nucleus": 0, "ER": 1, "Peroxisome": 2, "Mitochondrion": 3, "Nucleus_export": 4,
                 "SIGNAL": 5, "chloroplast": 6, "Thylakoid": 7}
    samples = []
    n = 8 # configs.encoder.num_classes
    df = pd.read_csv(csv_file)
    row, col = df.shape
    for i in range(row):
        prot_id = df.loc[i, "Entry"]
        seq = df.loc[i, "Sequence"]
        targets = np.zeros([n, len(seq)])
        type_protein = np.zeros(n)
        # motifs = df.iloc[i,1:-2]
        motifs = df.loc[i, "MOTIF"].split("|")
        for motif in motifs:
            if not pd.isnull(motif):
                # label = motif.split("|")[0].split(":")[1]
                label = motif.split(":")[1]
                # motif_left = motif.split("|")[0].split(":")[0].split("-")[0]
                motif_left = motif.split(":")[0].split("-")[0]
                motif_right = motif.split(":")[0].split("-")[1]

                motif_left, motif_right, type_protein, targets = fix_sample(motif_left, motif_right, label, label2idx, type_protein, targets)
                if label in label2idx:
                    index_row = label2idx[label]
                    type_protein[index_row] = 1
                    if label in ["SIGNAL", "chloroplast", "Thylakoid", "Mitochondrion"]:
                        targets[index_row, motif_right-1] = 1
                    elif label == "Peroxisome" and motif_left == 0:
                        targets[index_row, motif_right-1] = 1
                    elif label == "Peroxisome" and motif_left != 0:
                        targets[index_row, motif_left] = 1
                    elif label == "ER":
                        targets[index_row, motif_left] = 1
                    elif label == "Nucleus" or label == "Nucleus_export":
                        targets[index_row, motif_left:motif_right] = 1

        id_frag_list, seq_frag_list, target_frag_list = split_protein_sequence(prot_id, seq, targets)
        samples.append((prot_id, id_frag_list, seq_frag_list, target_frag_list, type_protein))
        # for j in range(len(fragments)):
        #     id=prot_id+"@"+str(j)
        #     samples.append((id, fragments[j], target_frags[j], type_protein))
    return samples

def fix_sample(motif_left, motif_right, label, label2idx, type_protein, targets):
    if motif_left == "None":
        motif_left = 0
    else:
        motif_left = int(motif_left)-1
    motif_right = int(motif_right)
    if label == "Thylakoid" and motif_left != 0:
        index_row = label2idx["chloroplast"]
        type_protein[index_row] = 1
        targets[index_row, motif_left-1] = 1
    return motif_left, motif_right, type_protein, targets

In [4]:
samples = prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/PLANTS_uniprot.csv")
samples.extend(prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/ANIMALS_uniprot.csv"))
samples.extend(prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/FUNGI_uniprot.csv"))
cv = pd.read_csv("/content/MUTargetCLEAN0513/parsed_EC7_v2/split/type/partition.csv")

In [5]:
samples_list = []
for i in range(len(samples)):
    if len(samples[i][1]) == 1:
        samples_list.append([samples[i][1][0], samples[i][2][0]])
    else:
        for j in range(len(samples[i][1])):
            samples_list.append([samples[i][1][j], samples[i][2][j]])
for i in samples_list:
    # if len(i[1]) != 1:
        print(i)
print(len(samples_list))
print(len(samples))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
['O80982@0', 'MEELKVEMEEETVTFTGSVAASSSVGSSSSPRPMEGLNETGPPPFLTKTYEMVEDPATDTVVSWSNGRNSFVVWDSHKFSTTLLPRYFKHSNFSSFIRQLNTYGFRKIDPDRWEFANEGFLAGQKHLLKNIKRRRNMGLQNVNQQGSGMSCVEVGQYGFDGEVERLKRDHGVLVAEVVRLRQQQHSSKSQVAAMEQRLLVTEKRQQQMMTFLAKALNNPNFVQQFAVMSKEKKSLFGLDVGRKRRLTSTPSLGTMEENLLHDQEFDRMKDDMEMLFAAAIDDEANNSMPTKEEQCLEAMNVMMRDGNLEAALDVKVEDLVGSPLDWDSQDLHDMVDQMGFLGSEP']
['O81001@0', 'MDLGVRVSGHETVSSPGQTELGSGFSNKQERSGFDGEDCWRSSKLSRTSTDGFSSSPASAKTLSFHQGIPLLRSTTINDPRKGQEHMLSFSSASGKSDVSPYLQYCRNSGYGLGGMMNTSNMHGNLLTGVKGPFSLTQWAELEQQALIYKYITANVPVPSSLLLSLKKSFFPYGSLPPNSFGWGSFHLGFSGGNMDPEPGRCRRTDGKKWRCSRDAVPDQKYCERHINRGRHRSRKPVEGQNGHNTNAAAAASAAAASTAAAVSKAAAGTSAVAMRGSDNNNSLAAAVGTQHHTNNQSTDSLANRVQNSRGASVFPATMNLQSKETHPKQSNNPFEFGLISSDSLLNPSHKQASYATSSKGFGSYLDFGNQAKHAGNHNNVDSWPEELKSDWTQLSMSIPMAPSSPVQDKLALSPLRLSREFDPAIHMGLGVNTEFLDPGKKTNNWIPISWGNNNSMGGPLGEVLNSTTNSPKFGSSPTGVLQKSTFGSLSNSSSASSTIIGDNNNKNGDGKDPLGPTTLMNTSATAPSL']
['O82312@0', 'MVSTQQRTDDDS

In [6]:
with open('samples_in.fa', 'w') as f:
    for item in samples_list:
        f.write(f'>{item[0]}\n{item[1]}\n')

In [7]:
!wget https://github.com/weizhongli/cdhit/releases/download/V4.8.1/cd-hit-v4.8.1-2019-0228.tar.gz
!tar -xf cd-hit-v4.8.1-2019-0228.tar.gz

--2024-07-14 02:36:30--  https://github.com/weizhongli/cdhit/releases/download/V4.8.1/cd-hit-v4.8.1-2019-0228.tar.gz
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/35050301/216f6a00-3b6b-11e9-9fec-85005717b86a?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20240714%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240714T023630Z&X-Amz-Expires=300&X-Amz-Signature=73ffb7ce6d96e7e23d2bee6ca67484843b319391aca105a93463bb480647e61b&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=35050301&response-content-disposition=attachment%3B%20filename%3Dcd-hit-v4.8.1-2019-0228.tar.gz&response-content-type=application%2Foctet-stream [following]
--2024-07-14 02:36:30--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/35050301/216f6a00-3b6b-11

In [8]:
import os
os.chdir("/content/cd-hit-v4.8.1-2019-0228")
!make -j 4
os.chdir("/content")

g++  -fopenmp -DWITH_ZLIB -O2  cdhit-common.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-utility.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-est.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-2d.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-est-2d.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-div.c++ -c
g++  -fopenmp -DWITH_ZLIB -O2  cdhit-454.c++ -c
In file included from [01m[Kcdhit-common.c++:28[m[K:
In member function ‘[01m[Kvoid NVector<TYPE>::Resize(int, const TYPE&) [with TYPE = long int][m[K’,
    inlined from ‘[01m[KNVector<TYPE>::NVector(int, const TYPE&) [with TYPE = long int][m[K’ at [01m[Kcdhit-common.h:119:10[m[K,
    inlined from ‘[01m[Kint local_band_align(char*, char*, int, int, ScoreMatrix&, int&, int&, int&, float&, int*, int, int, int, WorkingBuffer&)[m[K’ at [01m[Kcdhit-common.c++:815:36[m[K:
  144 |                                 items = (TYPE*)[01;35m[Krealloc( items, capacity*sizeof(TYPE) )[m[K;
   

In [9]:
!/content/cd-hit-v4.8.1-2019-0228/cd-hit -i /content/samples_in.fa -o /content/samples_out.fa -c 0.9 -n 5 -aS 0.8 -G 0 -g 1

Program: CD-HIT, V4.8.1 (+OpenMP), Jul 14 2024, 02:36:32
Command: /content/cd-hit-v4.8.1-2019-0228/cd-hit -i
         /content/samples_in.fa -o /content/samples_out.fa -c
         0.9 -n 5 -aS 0.8 -G 0 -g 1

Started: Sun Jul 14 02:36:51 2024
                            Output                              
----------------------------------------------------------------
total seq: 5627
longest and shortest : 1022 and 28
Total letters: 2160389
Sequences have been sorted

Approximated minimal memory consumption:
Sequence        : 2M
Buffer          : 1 X 10M = 10M
Table           : 1 X 65M = 65M
Miscellaneous   : 0M
Total           : 79M

Table limit with the given memory limit:
Max number of representatives: 1147840
Max number of word counting entries: 90108598

comparing sequences from          0  to       5627
.....
     5627  finished       4817  clusters

Approximated maximum memory consumption: 93M
writing new database
writing clustering information
program completed !

Total CPU t

In [33]:
def read_fasta_ids(file_path):
    ids = []
    with open(file_path, 'r') as f:
        for line in f:
            if line.startswith('>'):
                ids.append(line[1:].strip())
    return ids

file_path = '/content/samples_out.fa'
ids_list = read_fasta_ids(file_path)
print(ids_list)
len(ids_list)

['O04073@0', 'O22609@0', 'O23403@0', 'O81103@0', 'O82660@0', 'P42699@0', 'P82281@0', 'P82538@0', 'Q0WRJ7@0', 'Q39249@0', 'Q42029@0', 'Q9ASS6@0', 'Q9LR64@0', 'Q9LU10@0', 'Q9LU86@0', 'Q9LXX5@0', 'Q9M158@0', 'Q9SCY2@0', 'Q9SEL7@0', 'Q9SM43@0', 'Q9SSA5@0', 'Q9XFT3@0', 'F4KHG6@0', 'O22160@0', 'O22870@0', 'O23614@0', 'O49292@0', 'P11490@0', 'P82715@0', 'P82869@0', 'Q40251@0', 'Q41387@0', 'Q9LM71@0', 'Q9LYR5@0', 'Q9ZVL6@0', 'O22773@0', 'O49939@0', 'P81760@0', 'Q6ETQ7@0', 'Q9S720@0', 'Q9SPI9@0', 'P82658@0', 'P85194@0', 'Q9LVV5@0', 'Q9SW33@0', 'Q84WW2@0', 'Q8GW43@0', 'Q8LF36@0', 'Q8LPS1@0', 'Q9FWA3@0', 'O64894@0', 'Q9SIP1@0', 'O04151@0', 'O04153@0', 'O04373@0', 'P54970@0', 'Q38858@0', 'Q6Z7B0@0', 'Q8H1B3@0', 'Q9C525@0', 'Q9FF55@0', 'Q9FGR9@0', 'Q9LKR3@0', 'Q9SE50@0', 'Q9SLY8@0', 'Q9SR37@0', 'Q9XI01@0', 'Q6Z058@0', 'P54969@0', 'P80284@0', 'Q9STL4@0', 'A8MR65@0', 'Q8LH59@0', 'Q8S4Q6@0', 'Q9LPZ4@0', 'A6Q0K5@0', 'A8J637@0', 'D0TZF0@0', 'F4IW47@0', 'I4DST8@0', 'O04121@0', 'O04616@0', 'O04921@0', 'O2

4817

In [34]:
ids_list_nosub = []
for i in ids_list:
    ids_list_nosub.append(i.split("@")[0])
print(ids_list_nosub)
len(ids_list_nosub)

['O04073', 'O22609', 'O23403', 'O81103', 'O82660', 'P42699', 'P82281', 'P82538', 'Q0WRJ7', 'Q39249', 'Q42029', 'Q9ASS6', 'Q9LR64', 'Q9LU10', 'Q9LU86', 'Q9LXX5', 'Q9M158', 'Q9SCY2', 'Q9SEL7', 'Q9SM43', 'Q9SSA5', 'Q9XFT3', 'F4KHG6', 'O22160', 'O22870', 'O23614', 'O49292', 'P11490', 'P82715', 'P82869', 'Q40251', 'Q41387', 'Q9LM71', 'Q9LYR5', 'Q9ZVL6', 'O22773', 'O49939', 'P81760', 'Q6ETQ7', 'Q9S720', 'Q9SPI9', 'P82658', 'P85194', 'Q9LVV5', 'Q9SW33', 'Q84WW2', 'Q8GW43', 'Q8LF36', 'Q8LPS1', 'Q9FWA3', 'O64894', 'Q9SIP1', 'O04151', 'O04153', 'O04373', 'P54970', 'Q38858', 'Q6Z7B0', 'Q8H1B3', 'Q9C525', 'Q9FF55', 'Q9FGR9', 'Q9LKR3', 'Q9SE50', 'Q9SLY8', 'Q9SR37', 'Q9XI01', 'Q6Z058', 'P54969', 'P80284', 'Q9STL4', 'A8MR65', 'Q8LH59', 'Q8S4Q6', 'Q9LPZ4', 'A6Q0K5', 'A8J637', 'D0TZF0', 'F4IW47', 'I4DST8', 'O04121', 'O04616', 'O04921', 'O20250', 'O22832', 'O23404', 'P00455', 'P04778', 'P07505', 'P07591', 'P08980', 'P09856', 'P11043', 'P11155', 'P12333', 'P14655', 'P15719', 'P16127', 'P17067', 'P19866',

4817

In [12]:
len(samples)

5283

In [13]:
samples[0]

('O04073',
 ['O04073@0'],
 ['MHSRTNCLQTSVRAPQPHFRPFTAVKTCRQRCSTTAAAAKRDQAQEQQPWIQVGLGLAAAATAVAVGLGAAALPAQAVTSEQLLFLEAWRAVDRAYVDKSFNGQSWFKLRETYLKKEPMDRRAQTYDAIRKLLAVLDDPFTRFLEPSRLAALRRGTAGSVTGVGLEITYDGGSGKDVVVLTPAPGGPAEKAGARAGDVIVTVDGTAVKGLSLYDVSDLLQGEADSQVEVVLHAPGAPSNTRTLQLTRQKVTINPVTFTTCSNVAAAALPPGAAKQQLGYVRLATFNSNTTAAAQQAFTELSKQGVAGLVLDIRNNGGGLFPAGVNVARMLVDRGDLVLIADSQGIRDIYSADGNSIDSATPLVVLVNRGTASASEVLAGALKDSKRGLIAGERTFGKGLIQTVVDLSDGSGVAVTVARYQTPAGVDINKIGVSPDVQLDPEVLPTDLEGVCRVLGSDAAPRLFG'],
 [array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]])],
 array([0., 0., 0., 0., 0., 0., 1., 1.]))

In [14]:
sample1 = []
for i in samples:
    if i[0] in ids_list_nosub:
        sample1.append(i)
print(len(sample1))

4560


In [15]:
sample2 = []
for i in samples:
    if any(elem in ids_list for elem in i[1]):
        sample2.append(i)
print(len(sample2))

4560


In [16]:
sample3 = []
for i in sample2:
    temp_id_list = []
    temp_seq_list = []
    temp_target_list = []
    for ii in i[1]:
        if ii in ids_list:
            temp_id_list.append(ii)
            temp_seq_list.append(i[2][i[1].index(ii)])
            temp_target_list.append(i[3][i[1].index(ii)])
        else:
            print(ii)
    temp = (i[0], temp_id_list, temp_seq_list, temp_target_list, i[4])
    sample3.append(temp)
print(len(sample3))

Q8VZ10@1
Q9SYI0@1
P26969@1
F4IDC2@1
Q2R2D5@1
Q6YUL8@1
Q9SSE9@1
F4K4D6@1
O80386@1
Q13315@3
Q6P5E4@1
P46934@1
Q6PIJ6@1
Q8BMI0@0
Q8CHI5@0
Q9Z207@1
F1LVW7@0
Q7KUT2@1
A2BH40@2
E9Q4N7@2
O14497@0
O14497@1
Q08211@0
Q28141@0
Q61194@1
Q61329@0
Q61329@1
Q61329@2
Q61329@3
Q9Y2G1@0
F1RRV3@3
O08710@3
P00450@1
P01267@3
P02465@1
P02466@0
P02751@2
P06756@1
P06882@0
P06882@2
P07589@0
P07589@1
P08123@0
P08514@1
P08648@1
P09470@1
P11654@2
P12821@1
P13612@1
P22063@1
P23631@1
P26006@1
P35555@3
P47820@0
P51840@1
Q01149@1
Q02846@1
Q61554@0
Q61554@1
Q61554@2
Q7SIB2@1
Q9NZV1@1
O35806@0
P0DJE3@0
P25365@1
P29064@1
4560


In [17]:
import pickle

with open('cdhit0.9.pkl', 'wb') as f:
    pickle.dump(sample1, f)


In [18]:
import pickle

with open('cdhit0.9_sub.pkl', 'wb') as f:
    pickle.dump(sample2, f)


.pkl is too big

In [19]:
samples = prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/PLANTS_uniprot.csv")
samples.extend(prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/ANIMALS_uniprot.csv"))
samples.extend(prepare_samples("/content/MUTargetCLEAN0513/parsed_EC7_v2/FUNGI_uniprot.csv"))
cv = pd.read_csv("/content/MUTargetCLEAN0513/parsed_EC7_v2/split/type/partition.csv")

In [20]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/PLANTS_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/PLANTS_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

1143
1064


In [21]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/FUNGI_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/FUNGI_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

367
355


In [22]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/ANIMALS_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/ANIMALS_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

3773
3141


In [23]:
test_list = [
        'Q9LPZ4', 'P15330', 'P35869', 'P70278', 'Q80UP3',
        'Q8LH59', 'P19484', 'P35123', 'Q6NVF4', 'Q8NG08', 'Q9BVS4', 'Q9NRA0', 'Q9NUL5', 'Q9UBP0', 'P78953',
        'A8MR65', 'Q8S4Q6', 'Q3U0V2', 'Q96D46', 'Q9NYA1', 'Q9ULX6', 'Q9WTL8',
        'P35922', 'P46934', 'P81299', 'Q13148', 'Q6ICB0', 'Q7TPV4', 'Q8N884', 'Q99LG4', 'Q9Z207',
        'O00571', 'P52306', 'Q13015', 'Q13568', 'Q5TAQ9', 'Q8NAG6', 'Q9BZ23', 'Q9BZS1',
    ]

In [24]:
for i in test_list:
    if i not in ids_list_nosub:
        print(i)

Q80UP3
P35123
Q9WTL8
P35922
Q99LG4


In [35]:
ids_list_nosub.append('Q80UP3')
ids_list_nosub.append('P35123')
ids_list_nosub.append('Q9WTL8')
ids_list_nosub.append('P35922')
ids_list_nosub.append('Q99LG4')

In [36]:
len(ids_list_nosub)

4822

In [37]:
ids_list_nosub[-10:]

['Q4WZ11',
 'Q8J0D2',
 'Q9HGE1',
 'B3STN5',
 'B3STN7',
 'Q80UP3',
 'P35123',
 'Q9WTL8',
 'P35922',
 'Q99LG4']

In [38]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/PLANTS_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/PLANTS_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

1143
1064


In [39]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/FUNGI_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/FUNGI_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

367
355


In [40]:
df = pd.read_csv('/content/MUTargetCLEAN0513/parsed_EC7_v2/ANIMALS_uniprot.csv')
filtered_df = df[df['Entry'].isin(ids_list_nosub)]
filtered_df.to_csv('/content/ANIMALS_uniprot_cdhit.csv', index=False)
print(len(df))
print(len(filtered_df))

3773
3146
