# Manual Matching Notebook for SCHEMA Dissertation

The purpose of this notebook is to manually match the schemas between Facebook and Twitter for evaluation purposes. 

The structure of this notebook is as follows.

1. Import and display of classes in both organisations.
2. Match the concepts and discuss the reasoning behind it.
3. Hypothsise whether the matching will be accurately made by each of the techniques (string, semantic or pattern matching).

This process will be repeated for both direct and indirect data transformations. 

In [1]:
import collections

def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [2]:
import json
def to_dict(obj):
    return json.loads(json.dumps(obj, default=lambda o: o.__dict__))

## 1. Import and Display of classes
### Twitter

In [3]:
from tweet import Tweet, Entities, UserMentions, Urls, Main

In [4]:
Tweet()

Tweet(retweeted=None, source=None, favorite_count=None, in_reply_to_status_id=None, id_str=None, in_reply_to_user_id=None, truncated=None, retweet_count=None, id=None, in_reply_to_status_id_str=None, possibly_sensitive=None, created_at=None, favorited=None, full_text=None, lang=None, in_reply_to_screen_name=None, in_reply_to_user_id_str=None, entities=None, display_text_range=[])

In [5]:
from twitter_profile import Profile, Interests, Followers, Following

In [6]:
Profile()

Profile(bio=None, website=None, location=None, email=None, created_via=None, username=None, account_id=None, created_at=None, account_display_name=None, phone_number=None, followers=Followers(followers=[]), following=Following(following=[]), interests=Interests(interests=[]))

### Facebook

In [7]:
from facebook_post import Post, Data, Place, Coordinates, Attachments, ExternalContext, Media, MediaMetadata

In [8]:
Post()

Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

In [9]:
Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

In [10]:
from facebook_profile import Profile, Friends, Likes, Groups, Name, Location, RelationshipStatus, Date, Education, Work


In [11]:
Profile()

Profile(profile_id=None, name=Name(full_name=None, first_name=None, middle_name=None, last_name=None), date_of_birth=Date(year=None, month=None, day=None), current_city=Location(current_city=None), relationship_status=RelationshipStatus(anniversary=Date(year=None, month=None, day=None), status=None, partner=None), education_experiences=[], work_experiences=[], gender=None, phone_number=None, registration_timestamp=None, intro_bio=None, website=None, friends=Friends(friends=[]), likes=Likes(activities=[], music=[], movies=[], television=[], other=[], favourite_athletes=[], games=[], clothing=[]), emails=[])

In [12]:
Groups()

Groups(groups=[])

### Schema Adapter

In [13]:
from schema_adapter import Data as SchemaData 
from schema_adapter import TextualData, MediaData, LocationData, Profile, Relationships, Interests

In [14]:
Data()

Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None)))

## 2. Matching Concepts

### Posts
A post in Facebook and a Tweet in Twitter is a very similar concept. There are some nuances in concepts that are not shared in the datasets and are highlighted throughout this process, e.g. if a tweet is a retweet it is captured in the archive data whereas it is not captured as a shared post in the Facebook archive data. 

In [15]:
Post()

Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

In [16]:
Data()

Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None)))

In [17]:
Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

Post(title=None, timestamp=None, data=Data(post=None, title=None, update_timestamp=None, timestamp=None, place=Place(name=None, address=None, url=None, coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=None), media=Media(uri=None, title=None, description=None, creation_timestamp=None, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=[]), tags=[])

In [18]:
post = Post(title="This is the title of a Facebook post", 
     timestamp=1603805068, 
     data=Data(post="Post textual data", 
               title="Title of the post", 
               update_timestamp=1603805068, 
               timestamp=1603805068, 
               place=Place(name="Trinity", 
                           address="College Green, Dublin 2",
                           url="www.tcd.ie",
                           coordinates=Coordinates(latitude=53.34413938184222, 
                                                   longitude=-6.254528684659548)), 
               external_context=ExternalContext(url="www.scss.tcd.ie"), 
               media=Media(uri="https://en.wikipedia.org/wiki/File:Long_Room_Interior,_Trinity_College_Dublin,_Ireland_-_Diliff.jpg", 
                           title="Long Room", 
                           description="Interior of Long room", 
                           creation_timestamp=1603805068, 
                           media_metadata=MediaMetadata(photo_metadata=None))), 
     attachments=Attachments(data=["Attachment1"]), 
     tags=["#tcd"])

In [19]:
UserMentions()


UserMentions(name=None, screen_name=None, id_str=None, id=None, indices=[])

In [20]:
fb_to_tweet = Tweet(retweeted = None, 
                  source = None, 
                  favorite_count = None, 
                  in_reply_to_status_id = None, 
                  id_str = None, 
                  in_reply_to_user_id = None, 
                  truncated = None, 
                  retweet_count = None, 
                  id = None, 
                  in_reply_to_status_id_str = None, 
                  possibly_sensitive = None, 
                  created_at = post.timestamp, 
                  favorited = None, 
                  full_text = post.data.post,
                  lang = None,
                  in_reply_to_screen_name = None,
                  in_reply_to_user_id_str = None,
                  entities = Entities(user_mentions = UserMentions(name = None, 
                                                                screen_name = None,
                                                                id_str = None,
                                                                id = None,
                                                                indices = []), 
                                    hashtags = post.tags, 
                                    symbols = [],
                                    urls = [post.data.place.url, post.data.media.uri, post.data.place.url, post.data.external_context.url]),
                  display_text_range = [])

In [21]:
total = 0
count = 0
for k,v in flatten(to_dict(fb_to_tweet)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.2692307692307693


  if isinstance(v, collections.MutableMapping):


In [22]:
flatten(to_dict(fb_to_tweet))

{'retweeted': None,
 'source': None,
 'favorite_count': None,
 'in_reply_to_status_id': None,
 'id_str': None,
 'in_reply_to_user_id': None,
 'truncated': None,
 'retweet_count': None,
 'id': None,
 'in_reply_to_status_id_str': None,
 'possibly_sensitive': None,
 'created_at': 1603805068,
 'favorited': None,
 'full_text': 'Post textual data',
 'lang': None,
 'in_reply_to_screen_name': None,
 'in_reply_to_user_id_str': None,
 'entities_user_mentions_name': None,
 'entities_user_mentions_screen_name': None,
 'entities_user_mentions_id_str': None,
 'entities_user_mentions_id': None,
 'entities_user_mentions_indices': [],
 'entities_hashtags': ['#tcd'],
 'entities_symbols': [],
 'entities_urls': ['www.tcd.ie',
  'https://en.wikipedia.org/wiki/File:Long_Room_Interior,_Trinity_College_Dublin,_Ireland_-_Diliff.jpg',
  'www.tcd.ie',
  'www.scss.tcd.ie'],
 'display_text_range': []}

In [23]:
tweet = Tweet(retweeted="false", 
              source="Android",
              favorite_count=13,
              in_reply_to_status_id="false",
              id_str="12345",
              in_reply_to_user_id="false",
              truncated="false",
              retweet_count=0,
              id=12345,
              in_reply_to_status_id_str="false",
              possibly_sensitive="false",
              created_at=1603805068,
              favorited="false",
              full_text="@Claire is doing her #dissertation :) www.tcd.ie",
              lang="en",
              in_reply_to_screen_name="false",
              in_reply_to_user_id_str="false",
              entities = Entities(user_mentions = [UserMentions(name = "@Claire", 
                                                                screen_name = "Claire",
                                                                id_str = "432",
                                                                id = 432,
                                                                indices = [0,6])], 
                                    hashtags = ["#dissertation"], 
                                    symbols = [":)"],
                                    urls = ["www.tcd1.ie"]),
              display_text_range = [])

In [24]:
tweet

Tweet(retweeted='false', source='Android', favorite_count=13, in_reply_to_status_id='false', id_str='12345', in_reply_to_user_id='false', truncated='false', retweet_count=0, id=12345, in_reply_to_status_id_str='false', possibly_sensitive='false', created_at=1603805068, favorited='false', full_text='@Claire is doing her #dissertation :) www.tcd.ie', lang='en', in_reply_to_screen_name='false', in_reply_to_user_id_str='false', entities=Entities(user_mentions=[UserMentions(name='@Claire', screen_name='Claire', id_str='432', id=432, indices=[0, 6])], hashtags=['#dissertation'], symbols=[':)'], urls=['www.tcd1.ie']), display_text_range=[])

In [25]:
tweet_to_fb = Post(None, tweet.created_at, 
            Data(post = tweet.full_text, 
                 title=None, 
                 update_timestamp=tweet.created_at,
                 timestamp=tweet.created_at,
                 place=Place(name=None, 
                             address=None, 
                             url=tweet.entities.urls, 
                             coordinates=Coordinates(None, None)),
                external_context=ExternalContext(url=tweet.entities.urls),
                media=Media(uri=tweet.entities.urls, 
                            title=None,
                            description=None, 
                            creation_timestamp=tweet.created_at, 
                            media_metadata=None)),
                Attachments([None]),
           [None])

In [26]:
tweet_to_fb

Post(title=None, timestamp=1603805068, data=Data(post='@Claire is doing her #dissertation :) www.tcd.ie', title=None, update_timestamp=1603805068, timestamp=1603805068, place=Place(name=None, address=None, url=['www.tcd1.ie'], coordinates=Coordinates(latitude=None, longitude=None)), external_context=ExternalContext(url=['www.tcd1.ie']), media=Media(uri=['www.tcd1.ie'], title=None, description=None, creation_timestamp=1603805068, media_metadata=None)), attachments=Attachments(data=[None]), tags=[None])

In [27]:
total = 0
count = 0
for k,v in flatten(to_dict(tweet_to_fb)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.5263157894736843


In [28]:
flatten(to_dict(tweet_to_fb))

{'title': None,
 'timestamp': 1603805068,
 'data_post': '@Claire is doing her #dissertation :) www.tcd.ie',
 'data_title': None,
 'data_update_timestamp': 1603805068,
 'data_timestamp': 1603805068,
 'data_place_name': None,
 'data_place_address': None,
 'data_place_url': ['www.tcd1.ie'],
 'data_place_coordinates_latitude': None,
 'data_place_coordinates_longitude': None,
 'data_external_context_url': ['www.tcd1.ie'],
 'data_media_uri': ['www.tcd1.ie'],
 'data_media_title': None,
 'data_media_description': None,
 'data_media_creation_timestamp': 1603805068,
 'data_media_media_metadata': None,
 'attachments_data': [None],
 'tags': [None]}

### User Profile

In [29]:
from facebook_profile import Date, Education, Friends, Profile, Groups, Likes
from facebook_profile import Profile as f_Profile

In [30]:
f_Profile()

Profile(profile_id=None, name=Name(full_name=None, first_name=None, middle_name=None, last_name=None), date_of_birth=Date(year=None, month=None, day=None), current_city=Location(current_city=None), relationship_status=RelationshipStatus(anniversary=Date(year=None, month=None, day=None), status=None, partner=None), education_experiences=[], work_experiences=[], gender=None, phone_number=None, registration_timestamp=None, intro_bio=None, website=None, friends=Friends(friends=[]), likes=Likes(activities=[], music=[], movies=[], television=[], other=[], favourite_athletes=[], games=[], clothing=[]), emails=[])

In [31]:
fb_profile = f_Profile(profile_id = "claire.farrell.1610",
    name=Name(full_name='Claire Farrell', 
                  first_name='Claire', 
                  middle_name='', 
                                 
                  last_name='Farrell'), 
        date_of_birth=Date(year=1998, 
                           month=6, 
                           day=13), 
        current_city=Location(current_city='Dublin, Ireland'), 
        relationship_status=RelationshipStatus(anniversary=Date(year=2016, 
                                                                month=12, 
                                                                day=16), 
                                               status='In a relationship', 
                                               partner='A D'), 
        education_experiences=[Education(name='Technological University Dublin', 
                                         graduated=False, 
                                         school_type='University', 
                                         start_timestamp=1472713200, 
                                         timestamp=1471872580, 
                                         concentrations=['Business Analytics']), 
                               Education(name='Scoil Iosa', 
                                         graduated=True, 
                                         school_type='High School', 
                                         start_timestamp=1316812400, 
                                         timestamp=1316812400, 
                                         concentrations=[])], 
        work_experiences=[], 
        gender='FEMALE', 
        phone_number='+35387111111', 
        registration_timestamp=1604587343,
        intro_bio = "I am Claire, I am writing a dissertation",
        website = "www.tcdf.ie",
        friends=Friends(friends=['Jane Smith', 
                                 'John Smith', 
                                 'Joe Blogs']), 
        likes=Likes(activities=['Christmas'], 
                    music=['Today FM', 'Melanie Martinez', 'DIE ANTWOORD', 'Superhumanoids', 'Hozier'], 
                    movies=['Mattress Men - The Movie', 'Taken 3 Ireland', 'Dory', 'The Hunger Games'], 
                    television=['Love/Hate Series 5', 'Game Of Thrones UK', "Mrs Brown's Boys"], 
                    other=['GoMoIreland', 'Pop Up Gaeltacht La Jolla agus Contae San Diego', 'TES - Trinity Entrepreneurial', 'The Phil'], 
                    favourite_athletes=['Tom Daley'], 
                    games=['KODE 4 ALL'], 
                    clothing=['My Cotton Drawer', 'Happy Socks', 'Penneys']), 
        emails=['cfarrellc@gmail.com'])







In [32]:
from twitter_profile import Profile as t_Profile
from twitter_profile import Interests, Followers, Following

In [33]:
twitter_profile = t_Profile(bio='I like it here', 
                            website='www.tcd.ie', 
                            location='Dublin City, Ireland', 
                            email='claire@tcd.com', 
                            created_via='web', 
                            username='cfSchema', 
                            account_id='123123', 
                            created_at='2020-11-04T11:59:98.000Z', 
                            account_display_name="Claire's Dissertation", 
                            phone_number="0871111111",
                            followers = Followers(followers=['4631863647', '20275809', '2313826940']),
                            following = Following(following=['1912395307', '1243239258007183360', '902732309442514946']),
                            interests = Interests(interests=['data-engineering', 'trinity', 'cars']))

In [34]:
t_profile_to_fb_profile = f_Profile(profile_id=twitter_profile.username,
                                    name=Name(full_name=twitter_profile.account_display_name, 
                                              first_name=None, 
                                              middle_name=None, 
                                              last_name=None), 
                                    date_of_birth=Date(year=None, 
                                                       month=None, 
                                                       day=None), 
                                    current_city=Location(current_city=twitter_profile.location), 
                                    relationship_status=RelationshipStatus(anniversary=Date(year=None, 
                                                                                            month=None, 
                                                                                            day=None), 
                                                                           status=None, 
                                                                           partner=None), 
                                    education_experiences=[None], 
                                    work_experiences=[None], 
                                    gender=None, 
                                    phone_number=twitter_profile.phone_number, 
                                    registration_timestamp=twitter_profile.created_at,
                                    intro_bio = twitter_profile.bio,
                                    website = twitter_profile.website,
                                    friends=Friends(friends=twitter_profile.followers.followers + twitter_profile.following.following), 
                                    likes=Likes(activities=None, 
                                                music=None, 
                                                movies=None, 
                                                television=None, 
                                                other=twitter_profile.interests, 
                                                favourite_athletes=None, 
                                                games=None, 
                                                clothing=None), 
                                    emails=twitter_profile.email)


In [35]:
t_profile_to_fb_profile

Profile(profile_id='cfSchema', name=Name(full_name="Claire's Dissertation", first_name=None, middle_name=None, last_name=None), date_of_birth=Date(year=None, month=None, day=None), current_city=Location(current_city='Dublin City, Ireland'), relationship_status=RelationshipStatus(anniversary=Date(year=None, month=None, day=None), status=None, partner=None), education_experiences=[None], work_experiences=[None], gender=None, phone_number='0871111111', registration_timestamp='2020-11-04T11:59:98.000Z', intro_bio='I like it here', website='www.tcd.ie', friends=Friends(friends=['4631863647', '20275809', '2313826940', '1912395307', '1243239258007183360', '902732309442514946']), likes=Likes(activities=None, music=None, movies=None, television=None, other=Interests(interests=['data-engineering', 'trinity', 'cars']), favourite_athletes=None, games=None, clothing=None), emails='claire@tcd.com')

In [36]:
fb_profile_to_t_profile = t_Profile(bio=fb_profile.intro_bio, 
                                    website=fb_profile.website, 
                                    location=fb_profile.current_city.current_city, 
                                    email=fb_profile.emails, 
                                    created_via=None, 
                                    username=fb_profile.name.full_name, 
                                    account_id=fb_profile.profile_id, 
                                    created_at=fb_profile.registration_timestamp, 
                                    account_display_name=fb_profile.name.full_name, 
                                    phone_number=fb_profile.phone_number,
                                    followers = Followers(followers=fb_profile.friends.friends),
                                    following = Following(following=fb_profile.friends.friends),
                                    interests = Interests(fb_profile.likes.activities +
                                                         fb_profile.likes.favourite_athletes +
                                                         fb_profile.likes.other + 
                                                         fb_profile.likes.clothing +
                                                         fb_profile.likes.games +
                                                         fb_profile.likes.music + 
                                                         fb_profile.likes.movies + 
                                                         fb_profile.likes.television))

In [37]:
fb_profile_to_t_profile

Profile(bio='I am Claire, I am writing a dissertation', website='www.tcdf.ie', location='Dublin, Ireland', email=['cfarrellc@gmail.com'], created_via=None, username='Claire Farrell', account_id='claire.farrell.1610', created_at=1604587343, account_display_name='Claire Farrell', phone_number='+35387111111', followers=Followers(followers=['Jane Smith', 'John Smith', 'Joe Blogs']), following=Following(following=['Jane Smith', 'John Smith', 'Joe Blogs']), interests=Interests(interests=['Christmas', 'Tom Daley', 'GoMoIreland', 'Pop Up Gaeltacht La Jolla agus Contae San Diego', 'TES - Trinity Entrepreneurial', 'The Phil', 'My Cotton Drawer', 'Happy Socks', 'Penneys', 'KODE 4 ALL', 'Today FM', 'Melanie Martinez', 'DIE ANTWOORD', 'Superhumanoids', 'Hozier', 'Mattress Men - The Movie', 'Taken 3 Ireland', 'Dory', 'The Hunger Games', 'Love/Hate Series 5', 'Game Of Thrones UK', "Mrs Brown's Boys"]))

In [38]:
total = 0
count = 0
for k,v in flatten(to_dict(fb_profile_to_t_profile)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.9230769230769231


In [39]:
total = 0
count = 0
for k,v in flatten(to_dict(t_profile_to_fb_profile)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.3870967741935484


### Schema Adapter Influence

In [45]:
from schema_adapter import Data, TextualData, MediaData, LocationData, Profile, Relationships, Interests, Content


In [41]:
Data()

Data(content=Content(text=TextualData(text=None, urls=None, hashtags=None, people=None), media=MediaData(url=None, id_str=None, caption=None, types=None, indices=[]), location=LocationData(name=None, address=None, url=None, coordinates=(0, 0))), profile=Profile(name=None, phone_number=None, emails=None, date_of_birth=None, gender=None, biography=None, language=None, location=LocationData(name=None, address=None, url=None, coordinates=(0, 0)), education=[], profession=[], interests=[]), relationships=Relationships(relations=[]), interests=Interests(interests=[]), timestamp=None)

In [42]:
fb_profile

Profile(profile_id='claire.farrell.1610', name=Name(full_name='Claire Farrell', first_name='Claire', middle_name='', last_name='Farrell'), date_of_birth=Date(year=1998, month=6, day=13), current_city=Location(current_city='Dublin, Ireland'), relationship_status=RelationshipStatus(anniversary=Date(year=2016, month=12, day=16), status='In a relationship', partner='A D'), education_experiences=[Education(name='Technological University Dublin', graduated=False, school_type='University', start_timestamp=1472713200, timestamp=1471872580, concentrations=['Business Analytics']), Education(name='Scoil Iosa', graduated=True, school_type='High School', start_timestamp=1316812400, timestamp=1316812400, concentrations=[])], work_experiences=[], gender='FEMALE', phone_number='+35387111111', registration_timestamp=1604587343, intro_bio='I am Claire, I am writing a dissertation', website='www.tcdf.ie', friends=Friends(friends=['Jane Smith', 'John Smith', 'Joe Blogs']), likes=Likes(activities=['Christm

In [43]:
post

Post(title='This is the title of a Facebook post', timestamp=1603805068, data=Data(post='Post textual data', title='Title of the post', update_timestamp=1603805068, timestamp=1603805068, place=Place(name='Trinity', address='College Green, Dublin 2', url='www.tcd.ie', coordinates=Coordinates(latitude=53.34413938184222, longitude=-6.254528684659548)), external_context=ExternalContext(url='www.scss.tcd.ie'), media=Media(uri='https://en.wikipedia.org/wiki/File:Long_Room_Interior,_Trinity_College_Dublin,_Ireland_-_Diliff.jpg', title='Long Room', description='Interior of Long room', creation_timestamp=1603805068, media_metadata=MediaMetadata(photo_metadata=None))), attachments=Attachments(data=['Attachment1']), tags=['#tcd'])

In [59]:
fb_to_schema_adapter = Data(content = Content(text=TextualData(text=post.data.post, 
                                             urls=None, 
                                             hashtags=post.tags, 
                                             people=None), 
                            media=MediaData(url=post.data.media.uri,
                                            caption = post.data.media.description,
                                            id_str=None,
                                            types=None), 
                            location=LocationData(name=post.data.place.name, 
                                                  address=post.data.place.address, 
                                                  url=post.data.place.url, 
                                                  coordinates=(post.data.place.coordinates.latitude, post.data.place.coordinates.longitude))), 
                            profile=Profile(name=fb_profile.name.full_name, 
                                            phone_number=fb_profile.phone_number, 
                                            emails=fb_profile.emails, 
                                            date_of_birth=str(fb_profile.date_of_birth.day) + '/' + str(fb_profile.date_of_birth.month) + '/' + str(fb_profile.date_of_birth.year), 
                                            gender=fb_profile.gender, 
                                            biography=fb_profile.intro_bio, 
                                            language=None, 
                                            location=LocationData(name=fb_profile.current_city, 
                                                                  address=None, 
                                                                  url=None, 
                                                                  coordinates=(0, 0)), 
                                            education=fb_profile.education_experiences, 
                                            profession=fb_profile.work_experiences),
                            relationships=Relationships(relations=fb_profile.friends), 
                            interests=Interests(interests=fb_profile.likes), 
                            timestamp=None)

In [60]:
fb_to_schema_adapter

Data(content=Content(text=TextualData(text='Post textual data', urls=None, hashtags=['#tcd'], people=None), media=MediaData(url='https://en.wikipedia.org/wiki/File:Long_Room_Interior,_Trinity_College_Dublin,_Ireland_-_Diliff.jpg', id_str=None, caption='Interior of Long room', types=None, indices=[]), location=LocationData(name='Trinity', address='College Green, Dublin 2', url='www.tcd.ie', coordinates=(53.34413938184222, -6.254528684659548))), profile=Profile(name='Claire Farrell', phone_number='+35387111111', emails=['cfarrellc@gmail.com'], date_of_birth='13/6/1998', gender='FEMALE', biography='I am Claire, I am writing a dissertation', language=None, location=LocationData(name=Location(current_city='Dublin, Ireland'), address=None, url=None, coordinates=(0, 0)), education=[Education(name='Technological University Dublin', graduated=False, school_type='University', start_timestamp=1472713200, timestamp=1471872580, concentrations=['Business Analytics']), Education(name='Scoil Iosa', gr

In [50]:
total = 0
count = 0
for k,v in flatten(to_dict(fb_to_schema_adapter)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.7837837837837838


In [51]:
tweet

Tweet(retweeted='false', source='Android', favorite_count=13, in_reply_to_status_id='false', id_str='12345', in_reply_to_user_id='false', truncated='false', retweet_count=0, id=12345, in_reply_to_status_id_str='false', possibly_sensitive='false', created_at=1603805068, favorited='false', full_text='@Claire is doing her #dissertation :) www.tcd.ie', lang='en', in_reply_to_screen_name='false', in_reply_to_user_id_str='false', entities=Entities(user_mentions=[UserMentions(name='@Claire', screen_name='Claire', id_str='432', id=432, indices=[0, 6])], hashtags=['#dissertation'], symbols=[':)'], urls=['www.tcd1.ie']), display_text_range=[])

In [52]:
twitter_profile

Profile(bio='I like it here', website='www.tcd.ie', location='Dublin City, Ireland', email='claire@tcd.com', created_via='web', username='cfSchema', account_id='123123', created_at='2020-11-04T11:59:98.000Z', account_display_name="Claire's Dissertation", phone_number='0871111111', followers=Followers(followers=['4631863647', '20275809', '2313826940']), following=Following(following=['1912395307', '1243239258007183360', '902732309442514946']), interests=Interests(interests=['data-engineering', 'trinity', 'cars']))

In [63]:
twitter_to_schema_adapter = Data(content = Content(text=TextualData(text=tweet.full_text, 
                                             urls=tweet.entities.urls, 
                                             hashtags=tweet.entities.hashtags, 
                                             people=tweet.entities.user_mentions), 
                            media=MediaData(url=None,
                                            caption = None,
                                            id_str=None,
                                            types=None), 
                            location=LocationData(name=None, 
                                                  address=None, 
                                                  url=None, 
                                                  coordinates=0, 0)), 
                            profile=Profile(name=twitter_profile.account_display_name, 
                                            phone_number=twitter_profile.phone_number, 
                                            emails=twitter_profile.email, 
                                            date_of_birth=None,
                                            gender=None, 
                                            biography=twitter_profile.bio, 
                                            language=None, 
                                            location=LocationData(name=twitter_profile.location, 
                                                                  address=None, 
                                                                  url=None, 
                                                                  coordinates=(0, 0)), 
                                            education=None, 
                                            profession=None),
                            relationships=Relationships(relations=twitter_profile.followers + twitter_profile.following), 
                            interests=Interests(interests=twitter_profile.interests), 
                            timestamp=twitter_profile.created_at)
                

SyntaxError: positional argument follows keyword argument (<ipython-input-63-cde3ea28ddd5>, line 12)

In [58]:
twitter_to_schema_adapter

Data(content=Content(text=TextualData(text='@Claire is doing her #dissertation :) www.tcd.ie', urls=['www.tcd1.ie'], hashtags=['#dissertation'], people=[UserMentions(name='@Claire', screen_name='Claire', id_str='432', id=432, indices=[0, 6])]), media=MediaData(url='https://en.wikipedia.org/wiki/File:Long_Room_Interior,_Trinity_College_Dublin,_Ireland_-_Diliff.jpg', id_str=None, caption='Interior of Long room', types=None, indices=[]), location=LocationData(name='Trinity', address='College Green, Dublin 2', url='www.tcd.ie', coordinates=(53.34413938184222, -6.254528684659548))), profile=Profile(name='Claire Farrell', phone_number='+35387111111', emails=['cfarrellc@gmail.com'], date_of_birth='13/6/1998', gender='FEMALE', biography='I am Claire, I am writing a dissertation', language=None, location=LocationData(name=Location(current_city='Dublin, Ireland'), address=None, url=None, coordinates=(0, 0)), education=[Education(name='Technological University Dublin', graduated=False, school_typ

In [55]:
total = 0
count = 0
for k,v in flatten(to_dict(twitter_to_schema_adapter)).items():
    total+=1
    if v == None:
        count+=1
print(1-count/total)

0.8108108108108107
