8686ENABLE_RUST = os .environ .get ("SENTRY_USE_RUST_NORMALIZER" , "false" ).lower () in ("1" , "true" )
8787
8888
89+ def set_tag (data , key , value ):
90+ data ['tags' ] = [(k , v ) for k , v in data ['tags' ] if k != key ]
91+ data ['tags' ].append ((key , value ))
92+
93+
8994def get_event_metadata_compat (data , fallback_message ):
9095 """This is a fallback path to getting the event metadata. This is used
9196 by some code paths that could potentially deal with old sentry events that
@@ -412,6 +417,7 @@ def __init__(
412417 self ._auth = auth
413418 self ._key = key
414419 self ._for_store = for_store
420+ self ._normalized = False
415421
416422 def process_csp_report (self ):
417423 """Only called from the CSP report endpoint."""
@@ -464,6 +470,10 @@ def clean(d):
464470 self ._data = data
465471
466472 def normalize (self ):
473+ if self ._normalized :
474+ raise RuntimeError ('Already normalized' )
475+ self ._normalized = True
476+
467477 if ENABLE_RUST :
468478 from semaphore .processing import StoreNormalizer
469479 rust_normalizer = StoreNormalizer (
@@ -656,8 +666,6 @@ def stringify(f):
656666 data .setdefault ('fingerprint' , None )
657667 data .setdefault ('logger' , DEFAULT_LOGGER_NAME )
658668 data .setdefault ('platform' , None )
659- data .setdefault ('server_name' , None )
660- data .setdefault ('site' , None )
661669 data .setdefault ('tags' , [])
662670 data .setdefault ('transaction' , None )
663671
@@ -730,12 +738,18 @@ def stringify(f):
730738 # XXX: This will be trimmed again when inserted into tag values
731739 data ['transaction' ] = trim (data ['transaction' ], MAX_CULPRIT_LENGTH )
732740
741+ # Move some legacy data into tags
742+ site = data .pop ('site' , None )
743+ if site is not None :
744+ set_tag (data , 'site' , site )
745+ server_name = data .pop ('server_name' , None )
746+ if server_name is not None :
747+ set_tag (data , 'server_name' , server_name )
748+
733749 # Do not add errors unless there are for non store mode
734750 if not self ._for_store and not data .get ('errors' ):
735751 self ._data .pop ('errors' )
736752
737- self ._data = data
738-
739753 def should_filter (self ):
740754 '''
741755 returns (result: bool, reason: string or None)
@@ -785,16 +799,14 @@ def get_data(self):
785799 return self ._data
786800
787801 def _get_event_instance (self , project_id = None ):
788- data = self ._data . copy ()
789- event_id = data .pop ('event_id' )
790- platform = data .pop ('platform' , None )
802+ data = self ._data
803+ event_id = data .get ('event_id' )
804+ platform = data .get ('platform' )
791805
792- recorded_timestamp = data .pop ('timestamp' )
806+ recorded_timestamp = data .get ('timestamp' )
793807 date = datetime .fromtimestamp (recorded_timestamp )
794808 date = date .replace (tzinfo = timezone .utc )
795-
796- # unused
797- time_spent = data .pop ('time_spent' , None )
809+ time_spent = data .get ('time_spent' )
798810
799811 return Event (
800812 project_id = project_id or self ._project .id ,
@@ -805,11 +817,30 @@ def _get_event_instance(self, project_id=None):
805817 platform = platform
806818 )
807819
808- def get_search_message (self , data , event_metadata = None , culprit = None ):
820+ def get_culprit (self ):
821+ """Helper to calculate the default culprit"""
822+ return force_text (
823+ self ._data .get ('culprit' ) or
824+ self ._data .get ('transaction' ) or
825+ generate_culprit (self ._data , platform = self ._data ['platform' ]) or
826+ ''
827+ )
828+
829+ def get_event_type (self ):
830+ """Returns the event type."""
831+ return eventtypes .get (self ._data .get ('type' , 'default' ))(self ._data )
832+
833+ def get_search_message (self , event_metadata = None , culprit = None ):
809834 """This generates the internal event.message attribute which is used
810835 for search purposes. It adds a bunch of data from the metadata and
811836 the culprit.
812837 """
838+ if event_metadata is None :
839+ event_metadata = self .get_event_type ().get_metadata ()
840+ if culprit is None :
841+ culprit = self .get_culprit ()
842+
843+ data = self ._data
813844 message = ''
814845
815846 if 'logentry' in data :
@@ -828,7 +859,13 @@ def get_search_message(self, data, event_metadata=None, culprit=None):
828859
829860 return trim (message .strip (), settings .SENTRY_MAX_MESSAGE_LENGTH )
830861
831- def save (self , project_id , raw = False ):
862+ def save (self , project_id , raw = False , assume_normalized = False ):
863+ # Normalize if needed
864+ if not self ._normalized :
865+ if not assume_normalized :
866+ self .normalize ()
867+ self ._normalized = True
868+
832869 from sentry .tasks .post_process import index_event_tags
833870
834871 data = self ._data
@@ -859,53 +896,50 @@ def save(self, project_id, raw=False):
859896 )
860897 return event
861898
862- # pull out our top-level (non-data attr) kwargs
863- level = data .pop ('level' )
864- transaction_name = data .pop ('transaction' , None )
865- culprit = data .pop ('culprit' , None )
866- logger_name = data .pop ('logger' , None )
867- server_name = data .pop ('server_name' , None )
868- site = data .pop ('site' , None )
869- checksum = data .pop ('checksum' , None )
870- fingerprint = data .pop ('fingerprint' , None )
871- release = data .pop ('release' , None )
872- dist = data .pop ('dist' , None )
873- environment = data .pop ('environment' , None )
874- recorded_timestamp = data .get ("timestamp" )
875-
876- # old events had a small chance of having a legacy message
877- # attribute show up here. In all reality this is being coerced
878- # into logentry for more than two years at this point (2018).
879- data .pop ('message' , None )
880-
899+ # Pull out the culprit
900+ culprit = self .get_culprit ()
901+
902+ # Pull the toplevel data we're interested in
903+ level = data .get ('level' )
904+
905+ # TODO(mitsuhiko): this code path should be gone by July 2018.
906+ # This is going to be fine because no code actually still depends
907+ # on integers here. When we need an integer it will be converted
908+ # into one later. Old workers used to send integers here.
909+ if level is not None and isinstance (level , six .integer_types ):
910+ level = LOG_LEVELS [level ]
911+
912+ transaction_name = data .get ('transaction' )
913+ logger_name = data .get ('logger' )
914+ checksum = data .get ('checksum' )
915+ fingerprint = data .get ('fingerprint' )
916+ release = data .get ('release' )
917+ dist = data .get ('dist' )
918+ environment = data .get ('environment' )
919+ recorded_timestamp = data .get ('timestamp' )
920+
921+ # We need to swap out the data with the one internal to the newly
922+ # created event object
881923 event = self ._get_event_instance (project_id = project_id )
924+ self ._data = data = event .data .data
925+
882926 event ._project_cache = project
883927
884928 date = event .datetime
885929 platform = event .platform
886930 event_id = event .event_id
887- data = event .data .data
888- self ._data = None
889-
890- culprit = culprit or \
891- transaction_name or \
892- generate_culprit (data , platform = platform ) or \
893- ''
894931
895- culprit = force_text (culprit )
896932 if transaction_name :
897933 transaction_name = force_text (transaction_name )
898934
899- # convert this to a dict to ensure we're only storing one value per key
900- # as most parts of Sentry dont currently play well with multiple values
935+ # Some of the data that are toplevel attributes are duplicated
936+ # into tags (logger, level, environment, transaction). These are
937+ # different from legacy attributes which are normalized into tags
938+ # ahead of time (site, server_name).
901939 tags = dict (data .get ('tags' ) or [])
902- tags ['level' ] = LOG_LEVELS [ level ]
940+ tags ['level' ] = level
903941 if logger_name :
904942 tags ['logger' ] = logger_name
905- if server_name :
906- tags ['server_name' ] = server_name
907- if site :
908- tags ['site' ] = site
909943 if environment :
910944 tags ['environment' ] = trim (environment , MAX_TAG_VALUE_LENGTH )
911945 if transaction_name :
@@ -925,6 +959,9 @@ def save(self, project_id, raw=False):
925959
926960 if dist and release :
927961 dist = release .add_dist (dist , date )
962+ # dont allow a conflicting 'dist' tag
963+ if 'dist' in tags :
964+ del tags ['dist' ]
928965 tags ['sentry:dist' ] = dist .name
929966 else :
930967 dist = None
@@ -973,42 +1010,35 @@ def save(self, project_id, raw=False):
9731010 else :
9741011 hashes = [md5_from_hash (h ) for h in get_hashes_for_event (event )]
9751012
976- event_type = eventtypes . get ( data . get ( 'type' , 'default' ))( data )
1013+ event_type = self . get_event_type ( )
9771014 event_metadata = event_type .get_metadata ()
9781015
9791016 data ['type' ] = event_type .key
9801017 data ['metadata' ] = event_metadata
9811018
9821019 # index components into ``Event.message``
9831020 # See GH-3248
984- event .message = self .get_search_message (data , event_metadata , culprit )
1021+ event .message = self .get_search_message (event_metadata , culprit )
1022+ received_timestamp = event .data .get ('received' ) or float (event .datetime .strftime ('%s' ))
9851023
9861024 kwargs = {
9871025 'platform' : platform ,
988- 'message' : event .message
1026+ 'message' : event .message ,
1027+ 'culprit' : culprit ,
1028+ 'logger' : logger_name ,
1029+ 'level' : LOG_LEVELS_MAP .get (level ),
1030+ 'last_seen' : date ,
1031+ 'first_seen' : date ,
1032+ 'active_at' : date ,
1033+ 'data' : {
1034+ 'last_received' : received_timestamp ,
1035+ 'type' : event_type .key ,
1036+ # we cache the events metadata on the group to ensure its
1037+ # accessible in the stream
1038+ 'metadata' : event_metadata ,
1039+ },
9891040 }
9901041
991- received_timestamp = event .data .get ('received' ) or float (event .datetime .strftime ('%s' ))
992- kwargs .update (
993- {
994- 'culprit' : culprit ,
995- 'logger' : logger_name ,
996- 'level' : level ,
997- 'last_seen' : date ,
998- 'first_seen' : date ,
999- 'active_at' : date ,
1000- 'data' : {
1001- 'last_received' : received_timestamp ,
1002- 'type' :
1003- event_type .key ,
1004- # we cache the events metadata on the group to ensure its
1005- # accessible in the stream
1006- 'metadata' :
1007- event_metadata ,
1008- },
1009- }
1010- )
1011-
10121042 if release :
10131043 kwargs ['first_release' ] = release
10141044
0 commit comments